use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.
the class MLContextConversionUtil method javaRDDStringIJVToFrameObject.
/**
* Convert a {@code JavaRDD<String>} in IJV format to a {@code FrameObject}
* . Note that metadata is required for IJV format.
*
* @param javaRDD
* the Java RDD of strings
* @param frameMetadata
* frame metadata
* @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
*/
public static FrameObject javaRDDStringIJVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
JavaPairRDD<Long, FrameBlock> rdd;
try {
ValueType[] lschema = null;
if (lschema == null)
lschema = UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc(), javaPairRDDText, mc, lschema);
} catch (DMLRuntimeException e) {
e.printStackTrace();
return null;
}
frameObject.setRDDHandle(new RDDObject(rdd));
return frameObject;
}
use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.
the class Recompiler method executeInMemoryFrameReblock.
public static void executeInMemoryFrameReblock(ExecutionContext ec, String varin, String varout) {
FrameObject in = ec.getFrameObject(varin);
FrameObject out = ec.getFrameObject(varout);
// read text input frame (through buffer pool, frame object carries all relevant
// information including additional arguments for csv reblock)
FrameBlock fb = in.acquireRead();
// set output (incl update matrix characteristics)
out.acquireModify(fb);
out.release();
in.release();
}
use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.
the class Recompiler method rUpdateStatistics.
public static void rUpdateStatistics(Hop hop, LocalVariableMap vars) {
if (hop.isVisited())
return;
// recursively process children
if (hop.getInput() != null)
for (Hop c : hop.getInput()) rUpdateStatistics(c, vars);
boolean updatedSizeExpr = false;
// (with awareness not to override persistent reads to an existing name)
if (hop instanceof DataOp && ((DataOp) hop).getDataOpType() != DataOpTypes.PERSISTENTREAD) {
DataOp d = (DataOp) hop;
String varName = d.getName();
if (vars.keySet().contains(varName)) {
Data dat = vars.get(varName);
if (dat instanceof MatrixObject) {
MatrixObject mo = (MatrixObject) dat;
d.setDim1(mo.getNumRows());
d.setDim2(mo.getNumColumns());
d.setNnz(mo.getNnz());
} else if (dat instanceof FrameObject) {
FrameObject fo = (FrameObject) dat;
d.setDim1(fo.getNumRows());
d.setDim2(fo.getNumColumns());
}
}
} else // special case for persistent reads with unknown size (read-after-write)
if (hop instanceof DataOp && ((DataOp) hop).getDataOpType() == DataOpTypes.PERSISTENTREAD && !hop.dimsKnown() && ((DataOp) hop).getInputFormatType() != FileFormatTypes.CSV && !ConfigurationManager.getCompilerConfigFlag(ConfigType.IGNORE_READ_WRITE_METADATA)) {
// update hop with read meta data
DataOp dop = (DataOp) hop;
tryReadMetaDataFileMatrixCharacteristics(dop);
} else // update size expression for rand/seq according to symbol table entries
if (hop instanceof DataGenOp) {
DataGenOp d = (DataGenOp) hop;
HashMap<String, Integer> params = d.getParamIndexMap();
if (d.getOp() == DataGenMethod.RAND || d.getOp() == DataGenMethod.SINIT || d.getOp() == DataGenMethod.SAMPLE) {
boolean initUnknown = !d.dimsKnown();
int ix1 = params.get(DataExpression.RAND_ROWS);
int ix2 = params.get(DataExpression.RAND_COLS);
// update rows/cols by evaluating simple expression of literals, nrow, ncol, scalars, binaryops
HashMap<Long, Long> memo = new HashMap<>();
d.refreshRowsParameterInformation(d.getInput().get(ix1), vars, memo);
d.refreshColsParameterInformation(d.getInput().get(ix2), vars, memo);
updatedSizeExpr = initUnknown & d.dimsKnown();
} else if (d.getOp() == DataGenMethod.SEQ) {
boolean initUnknown = !d.dimsKnown();
int ix1 = params.get(Statement.SEQ_FROM);
int ix2 = params.get(Statement.SEQ_TO);
int ix3 = params.get(Statement.SEQ_INCR);
HashMap<Long, Double> memo = new HashMap<>();
double from = d.computeBoundsInformation(d.getInput().get(ix1), vars, memo);
double to = d.computeBoundsInformation(d.getInput().get(ix2), vars, memo);
double incr = d.computeBoundsInformation(d.getInput().get(ix3), vars, memo);
// special case increment
if (from != Double.MAX_VALUE && to != Double.MAX_VALUE) {
incr *= ((from > to && incr > 0) || (from < to && incr < 0)) ? -1.0 : 1.0;
}
if (from != Double.MAX_VALUE && to != Double.MAX_VALUE && incr != Double.MAX_VALUE) {
d.setDim1(UtilFunctions.getSeqLength(from, to, incr));
d.setDim2(1);
d.setIncrementValue(incr);
}
updatedSizeExpr = initUnknown & d.dimsKnown();
} else {
throw new DMLRuntimeException("Unexpected data generation method: " + d.getOp());
}
} else // update size expression for reshape according to symbol table entries
if (hop instanceof ReorgOp && ((ReorgOp) (hop)).getOp() == Hop.ReOrgOp.RESHAPE) {
ReorgOp d = (ReorgOp) hop;
boolean initUnknown = !d.dimsKnown();
HashMap<Long, Long> memo = new HashMap<>();
d.refreshRowsParameterInformation(d.getInput().get(1), vars, memo);
d.refreshColsParameterInformation(d.getInput().get(2), vars, memo);
updatedSizeExpr = initUnknown & d.dimsKnown();
} else // update size expression for indexing according to symbol table entries
if (hop instanceof IndexingOp) {
IndexingOp iop = (IndexingOp) hop;
// inpRowL
Hop input2 = iop.getInput().get(1);
// inpRowU
Hop input3 = iop.getInput().get(2);
// inpColL
Hop input4 = iop.getInput().get(3);
// inpColU
Hop input5 = iop.getInput().get(4);
boolean initUnknown = !iop.dimsKnown();
HashMap<Long, Double> memo = new HashMap<>();
double rl = iop.computeBoundsInformation(input2, vars, memo);
double ru = iop.computeBoundsInformation(input3, vars, memo);
double cl = iop.computeBoundsInformation(input4, vars, memo);
double cu = iop.computeBoundsInformation(input5, vars, memo);
if (rl != Double.MAX_VALUE && ru != Double.MAX_VALUE)
iop.setDim1((long) (ru - rl + 1));
if (cl != Double.MAX_VALUE && cu != Double.MAX_VALUE)
iop.setDim2((long) (cu - cl + 1));
updatedSizeExpr = initUnknown & iop.dimsKnown();
}
// without overwriting inferred size expressions
if (!updatedSizeExpr) {
hop.refreshSizeInformation();
}
hop.setVisited();
}
use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.
the class ExecutionContext method setFrameOutput.
public void setFrameOutput(String varName, FrameBlock outputData) {
FrameObject fo = getFrameObject(varName);
fo.acquireModify(outputData);
fo.release();
setVariable(varName, fo);
}
use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.
the class SparkExecutionContext method getBroadcastForFrameVariable.
@SuppressWarnings("unchecked")
public PartitionedBroadcast<FrameBlock> getBroadcastForFrameVariable(String varname) {
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
FrameObject fo = getFrameObject(varname);
PartitionedBroadcast<FrameBlock> bret = null;
// reuse existing broadcast handle
if (fo.getBroadcastHandle() != null && fo.getBroadcastHandle().isValid()) {
bret = fo.getBroadcastHandle().getBroadcast();
}
// create new broadcast handle (never created, evicted)
if (bret == null) {
// account for overwritten invalid broadcast (e.g., evicted)
if (fo.getBroadcastHandle() != null)
CacheableData.addBroadcastSize(-fo.getBroadcastHandle().getSize());
// obtain meta data for frame
int bclen = (int) fo.getNumColumns();
int brlen = OptimizerUtils.getDefaultFrameSize();
// create partitioned frame block and release memory consumed by input
FrameBlock mb = fo.acquireRead();
PartitionedBlock<FrameBlock> pmb = new PartitionedBlock<>(mb, brlen, bclen);
fo.release();
// determine coarse-grained partitioning
int numPerPart = PartitionedBroadcast.computeBlocksPerPartition(fo.getNumRows(), fo.getNumColumns(), brlen, bclen);
int numParts = (int) Math.ceil((double) pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() / numPerPart);
Broadcast<PartitionedBlock<FrameBlock>>[] ret = new Broadcast[numParts];
// create coarse-grained partitioned broadcasts
if (numParts > 1) {
for (int i = 0; i < numParts; i++) {
int offset = i * numPerPart;
int numBlks = Math.min(numPerPart, pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() - offset);
PartitionedBlock<FrameBlock> tmp = pmb.createPartition(offset, numBlks, new FrameBlock());
ret[i] = getSparkContext().broadcast(tmp);
if (!isLocalMaster())
tmp.clearBlocks();
}
} else {
// single partition
ret[0] = getSparkContext().broadcast(pmb);
if (!isLocalMaster())
pmb.clearBlocks();
}
bret = new PartitionedBroadcast<>(ret, fo.getMatrixCharacteristics());
BroadcastObject<FrameBlock> bchandle = new BroadcastObject<>(bret, OptimizerUtils.estimatePartitionedSizeExactSparsity(fo.getMatrixCharacteristics()));
fo.setBroadcastHandle(bchandle);
CacheableData.addBroadcastSize(bchandle.getSize());
}
if (DMLScript.STATISTICS) {
Statistics.accSparkBroadCastTime(System.nanoTime() - t0);
Statistics.incSparkBroadcastCount(1);
}
return bret;
}
Aggregations