use of in project incubator-systemml by apache.
the class FrameObject method readBlobFromRDD.
protected FrameBlock readBlobFromRDD(RDDObject rdd, MutableBoolean status) throws IOException {
// note: the read of a frame block from an RDD might trigger
// lazy evaluation of pending transformations.
RDDObject lrdd = rdd;
// prepare return status (by default only collect)
MetaDataFormat iimd = (MetaDataFormat) _metaData;
MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
int rlen = (int) mc.getRows();
int clen = (int) mc.getCols();
// handle missing schema if necessary
ValueType[] lschema = (_schema != null) ? _schema : UtilFunctions.nCopies(clen >= 1 ? (int) clen : 1, ValueType.STRING);
FrameBlock fb = null;
try {
// prevent unnecessary collect through rdd checkpoint
if (rdd.allowsShortCircuitCollect()) {
lrdd = (RDDObject) rdd.getLineageChilds().get(0);
// collect frame block from binary block RDD
fb = SparkExecutionContext.toFrameBlock(lrdd, lschema, rlen, clen);
} catch (DMLRuntimeException ex) {
throw new IOException(ex);
// sanity check correct output
if (fb == null)
throw new IOException("Unable to load frame from rdd.");
return fb;
use of in project incubator-systemml by apache.
the class SparkExecutionContext method getBroadcastForFrameVariable.
public PartitionedBroadcast<FrameBlock> getBroadcastForFrameVariable(String varname) {
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
FrameObject fo = getFrameObject(varname);
PartitionedBroadcast<FrameBlock> bret = null;
// reuse existing broadcast handle
if (fo.getBroadcastHandle() != null && fo.getBroadcastHandle().isValid()) {
bret = fo.getBroadcastHandle().getBroadcast();
// create new broadcast handle (never created, evicted)
if (bret == null) {
// account for overwritten invalid broadcast (e.g., evicted)
if (fo.getBroadcastHandle() != null)
// obtain meta data for frame
int bclen = (int) fo.getNumColumns();
int brlen = OptimizerUtils.getDefaultFrameSize();
// create partitioned frame block and release memory consumed by input
FrameBlock mb = fo.acquireRead();
PartitionedBlock<FrameBlock> pmb = new PartitionedBlock<>(mb, brlen, bclen);
// determine coarse-grained partitioning
int numPerPart = PartitionedBroadcast.computeBlocksPerPartition(fo.getNumRows(), fo.getNumColumns(), brlen, bclen);
int numParts = (int) Math.ceil((double) pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() / numPerPart);
Broadcast<PartitionedBlock<FrameBlock>>[] ret = new Broadcast[numParts];
// create coarse-grained partitioned broadcasts
if (numParts > 1) {
for (int i = 0; i < numParts; i++) {
int offset = i * numPerPart;
int numBlks = Math.min(numPerPart, pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() - offset);
PartitionedBlock<FrameBlock> tmp = pmb.createPartition(offset, numBlks, new FrameBlock());
ret[i] = getSparkContext().broadcast(tmp);
if (!isLocalMaster())
} else {
// single partition
ret[0] = getSparkContext().broadcast(pmb);
if (!isLocalMaster())
bret = new PartitionedBroadcast<>(ret, fo.getMatrixCharacteristics());
BroadcastObject<FrameBlock> bchandle = new BroadcastObject<>(bret, OptimizerUtils.estimatePartitionedSizeExactSparsity(fo.getMatrixCharacteristics()));
Statistics.accSparkBroadCastTime(System.nanoTime() - t0);
return bret;
use of in project incubator-systemml by apache.
the class SparkExecutionContext method getRDDHandleForFrameObject.
* FIXME: currently this implementation assumes matrix representations but frame signature
* in order to support the old transform implementation.
* @param fo frame object
* @param inputInfo input info
* @return JavaPairRDD handle for a frame object
public JavaPairRDD<?, ?> getRDDHandleForFrameObject(FrameObject fo, InputInfo inputInfo) {
// NOTE: MB this logic should be integrated into FrameObject
// However, for now we cannot assume that spark libraries are
// always available and hence only store generic references in
// matrix object while all the logic is in the SparkExecContext
InputInfo inputInfo2 = (inputInfo == InputInfo.BinaryBlockInputInfo) ? InputInfo.BinaryBlockFrameInputInfo : inputInfo;
JavaSparkContext sc = getSparkContext();
JavaPairRDD<?, ?> rdd = null;
// rdd operations if already executed and cached
if (fo.getRDDHandle() != null && (fo.getRDDHandle().isCheckpointRDD() || !fo.isCached(false))) {
// return existing rdd handling (w/o input format change)
rdd = fo.getRDDHandle().getRDD();
} else // CASE 2: dirty in memory data or cached result of rdd operations
if (fo.isDirty() || fo.isCached(false)) {
// get in-memory matrix block and parallelize it
// w/ guarded parallelize (fallback to export, rdd from file if too large)
MatrixCharacteristics mc = fo.getMatrixCharacteristics();
boolean fromFile = false;
if (!OptimizerUtils.checkSparkCollectMemoryBudget(mc, 0) || !_parRDDs.reserve(OptimizerUtils.estimatePartitionedSizeExactSparsity(mc))) {
if (fo.isDirty()) {
// write only if necessary
rdd = sc.hadoopFile(fo.getFileName(), inputInfo2.inputFormatClass, inputInfo2.inputKeyClass, inputInfo2.inputValueClass);
// cp is workaround for read bug
rdd = ((JavaPairRDD<LongWritable, FrameBlock>) rdd).mapToPair(new CopyFrameBlockPairFunction());
fromFile = true;
} else {
// default case
// pin frame in memory
FrameBlock fb = fo.acquireRead();
rdd = toFrameJavaPairRDD(sc, fb);
// unpin frame
_parRDDs.registerRDD(, OptimizerUtils.estimatePartitionedSizeExactSparsity(mc), true);
// keep rdd handle for future operations on it
RDDObject rddhandle = new RDDObject(rdd);
} else // CASE 3: non-dirty (file exists on HDFS)
// For binary block, these are: SequenceFileInputFormat.class, MatrixIndexes.class, MatrixBlock.class
if (inputInfo2 == InputInfo.BinaryBlockFrameInputInfo) {
rdd = sc.hadoopFile(fo.getFileName(), inputInfo2.inputFormatClass, inputInfo2.inputKeyClass, inputInfo2.inputValueClass);
// note: this copy is still required in Spark 1.4 because spark hands out whatever the inputformat
// recordreader returns; the javadoc explicitly recommend to copy all key/value pairs
// cp is workaround for read bug
rdd = ((JavaPairRDD<LongWritable, FrameBlock>) rdd).mapToPair(new CopyFrameBlockPairFunction());
} else if (inputInfo2 == InputInfo.TextCellInputInfo || inputInfo2 == InputInfo.CSVInputInfo || inputInfo2 == InputInfo.MatrixMarketInputInfo) {
rdd = sc.hadoopFile(fo.getFileName(), inputInfo2.inputFormatClass, inputInfo2.inputKeyClass, inputInfo2.inputValueClass);
// cp is workaround for read bug
rdd = ((JavaPairRDD<LongWritable, Text>) rdd).mapToPair(new CopyTextInputFunction());
} else if (inputInfo2 == InputInfo.BinaryCellInputInfo) {
throw new DMLRuntimeException("Binarycell not supported for frames.");
} else {
throw new DMLRuntimeException("Incorrect input format in getRDDHandleForVariable");
// keep rdd handle for future operations on it
RDDObject rddhandle = new RDDObject(rdd);
return rdd;
use of in project incubator-systemml by apache.
the class ByteBuffer method deserializeBlock.
public CacheBlock deserializeBlock() throws IOException {
CacheBlock ret = null;
if (!_shallow) {
// sparse matrix / string frame
DataInput din = _matrix ? new CacheDataInput(_bdata) : new DataInputStream(new ByteArrayInputStream(_bdata));
ret = _matrix ? new MatrixBlock() : new FrameBlock();
} else {
// dense matrix/frame
ret = _cdata;
return ret;
use of in project incubator-systemml by apache.
the class ParameterizedBuiltinCPInstruction method processInstruction.
public void processInstruction(ExecutionContext ec) {
String opcode = getOpcode();
ScalarObject sores = null;
if (opcode.equalsIgnoreCase("cdf")) {
SimpleOperator op = (SimpleOperator) _optr;
double result = op.fn.execute(params);
sores = new DoubleObject(result);
ec.setScalarOutput(output.getName(), sores);
} else if (opcode.equalsIgnoreCase("invcdf")) {
SimpleOperator op = (SimpleOperator) _optr;
double result = op.fn.execute(params);
sores = new DoubleObject(result);
ec.setScalarOutput(output.getName(), sores);
} else if (opcode.equalsIgnoreCase("groupedagg")) {
// acquire locks
MatrixBlock target = ec.getMatrixInput(params.get(Statement.GAGG_TARGET), getExtendedOpcode());
MatrixBlock groups = ec.getMatrixInput(params.get(Statement.GAGG_GROUPS), getExtendedOpcode());
MatrixBlock weights = null;
if (params.get(Statement.GAGG_WEIGHTS) != null)
weights = ec.getMatrixInput(params.get(Statement.GAGG_WEIGHTS), getExtendedOpcode());
int ngroups = -1;
if (params.get(Statement.GAGG_NUM_GROUPS) != null) {
ngroups = (int) Double.parseDouble(params.get(Statement.GAGG_NUM_GROUPS));
// compute the result
// num threads
int k = Integer.parseInt(params.get("k"));
MatrixBlock soresBlock = groups.groupedAggOperations(target, weights, new MatrixBlock(), ngroups, _optr, k);
ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode());
// release locks
target = groups = weights = null;
ec.releaseMatrixInput(params.get(Statement.GAGG_TARGET), getExtendedOpcode());
ec.releaseMatrixInput(params.get(Statement.GAGG_GROUPS), getExtendedOpcode());
if (params.get(Statement.GAGG_WEIGHTS) != null)
ec.releaseMatrixInput(params.get(Statement.GAGG_WEIGHTS), getExtendedOpcode());
} else if (opcode.equalsIgnoreCase("rmempty")) {
String margin = params.get("margin");
if (!(margin.equals("rows") || margin.equals("cols")))
throw new DMLRuntimeException("Unspupported margin identifier '" + margin + "'.");
// acquire locks
MatrixBlock target = ec.getMatrixInput(params.get("target"), getExtendedOpcode());
MatrixBlock select = params.containsKey("select") ? ec.getMatrixInput(params.get("select"), getExtendedOpcode()) : null;
// compute the result
boolean emptyReturn = Boolean.parseBoolean(params.get("empty.return").toLowerCase());
MatrixBlock soresBlock = target.removeEmptyOperations(new MatrixBlock(), margin.equals("rows"), emptyReturn, select);
// release locks
ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode());
ec.releaseMatrixInput(params.get("target"), getExtendedOpcode());
if (params.containsKey("select"))
ec.releaseMatrixInput(params.get("select"), getExtendedOpcode());
} else if (opcode.equalsIgnoreCase("replace")) {
// acquire locks
MatrixBlock target = ec.getMatrixInput(params.get("target"), getExtendedOpcode());
// compute the result
double pattern = Double.parseDouble(params.get("pattern"));
double replacement = Double.parseDouble(params.get("replacement"));
MatrixBlock ret = (MatrixBlock) target.replaceOperations(new MatrixBlock(), pattern, replacement);
// release locks
ec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
ec.releaseMatrixInput(params.get("target"), getExtendedOpcode());
} else if (opcode.equalsIgnoreCase("rexpand")) {
// acquire locks
MatrixBlock target = ec.getMatrixInput(params.get("target"), getExtendedOpcode());
// compute the result
double maxVal = Double.parseDouble(params.get("max"));
boolean dirVal = params.get("dir").equals("rows");
boolean cast = Boolean.parseBoolean(params.get("cast"));
boolean ignore = Boolean.parseBoolean(params.get("ignore"));
int numThreads = Integer.parseInt(params.get("k"));
MatrixBlock ret = (MatrixBlock) target.rexpandOperations(new MatrixBlock(), maxVal, dirVal, cast, ignore, numThreads);
// release locks
ec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
ec.releaseMatrixInput(params.get("target"), getExtendedOpcode());
} else if (opcode.equalsIgnoreCase("transformapply")) {
// acquire locks
FrameBlock data = ec.getFrameInput(params.get("target"));
FrameBlock meta = ec.getFrameInput(params.get("meta"));
String[] colNames = data.getColumnNames();
// compute transformapply
Encoder encoder = EncoderFactory.createEncoder(params.get("spec"), colNames, data.getNumColumns(), meta);
MatrixBlock mbout = encoder.apply(data, new MatrixBlock(data.getNumRows(), data.getNumColumns(), false));
// release locks
ec.setMatrixOutput(output.getName(), mbout, getExtendedOpcode());
} else if (opcode.equalsIgnoreCase("transformdecode")) {
// acquire locks
MatrixBlock data = ec.getMatrixInput(params.get("target"), getExtendedOpcode());
FrameBlock meta = ec.getFrameInput(params.get("meta"));
String[] colnames = meta.getColumnNames();
// compute transformdecode
Decoder decoder = DecoderFactory.createDecoder(getParameterMap().get("spec"), colnames, null, meta);
FrameBlock fbout = decoder.decode(data, new FrameBlock(decoder.getSchema()));
fbout.setColumnNames(Arrays.copyOfRange(colnames, 0, fbout.getNumColumns()));
// release locks
ec.setFrameOutput(output.getName(), fbout);
ec.releaseMatrixInput(params.get("target"), getExtendedOpcode());
} else if (opcode.equalsIgnoreCase("transformcolmap")) {
// acquire locks
FrameBlock meta = ec.getFrameInput(params.get("target"));
String[] colNames = meta.getColumnNames();
// compute transformapply
Encoder encoder = EncoderFactory.createEncoder(params.get("spec"), colNames, meta.getNumColumns(), null);
MatrixBlock mbout = encoder.getColMapping(meta, new MatrixBlock(meta.getNumColumns(), 3, false));
// release locks
ec.setMatrixOutput(output.getName(), mbout, getExtendedOpcode());
} else if (opcode.equalsIgnoreCase("transformmeta")) {
// get input spec and path
String spec = getParameterMap().get("spec");
String path = getParameterMap().get(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_MTD);
String delim = getParameterMap().containsKey("sep") ? getParameterMap().get("sep") : TfUtils.TXMTD_SEP;
// execute transform meta data read
FrameBlock meta = null;
try {
meta = TfMetaUtils.readTransformMetaDataFromFile(spec, path, delim);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
// release locks
ec.setFrameOutput(output.getName(), meta);
} else if (opcode.equalsIgnoreCase("toString")) {
// handle input parameters
int rows = (getParam("rows") != null) ? Integer.parseInt(getParam("rows")) : TOSTRING_MAXROWS;
int cols = (getParam("cols") != null) ? Integer.parseInt(getParam("cols")) : TOSTRING_MAXCOLS;
int decimal = (getParam("decimal") != null) ? Integer.parseInt(getParam("decimal")) : TOSTRING_DECIMAL;
boolean sparse = (getParam("sparse") != null) ? Boolean.parseBoolean(getParam("sparse")) : TOSTRING_SPARSE;
String separator = (getParam("sep") != null) ? getParam("sep") : TOSTRING_SEPARATOR;
String lineseparator = (getParam("linesep") != null) ? getParam("linesep") : TOSTRING_LINESEPARATOR;
// get input matrix/frame and convert to string
CacheableData<?> data = ec.getCacheableData(getParam("target"));
String out = null;
if (data instanceof MatrixObject) {
MatrixBlock matrix = (MatrixBlock) data.acquireRead();
warnOnTrunction(matrix, rows, cols);
out = DataConverter.toString(matrix, sparse, separator, lineseparator, rows, cols, decimal);
} else if (data instanceof FrameObject) {
FrameBlock frame = (FrameBlock) data.acquireRead();
warnOnTrunction(frame, rows, cols);
out = DataConverter.toString(frame, sparse, separator, lineseparator, rows, cols, decimal);
} else {
throw new DMLRuntimeException("toString only converts matrix or frames to string");
ec.setScalarOutput(output.getName(), new StringObject(out));
} else {
throw new DMLRuntimeException("Unknown opcode : " + opcode);