use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MLContextConversionUtil method matrixBlockToMatrixObject.
/**
* Convert a {@code MatrixBlock} to a {@code MatrixObject}.
*
* @param variableName
* name of the variable associated with the matrix
* @param matrixBlock
* matrix as a MatrixBlock
* @param matrixMetadata
* the matrix metadata
* @return the {@code MatrixBlock} converted to a {@code MatrixObject}
*/
public static MatrixObject matrixBlockToMatrixObject(String variableName, MatrixBlock matrixBlock, MatrixMetadata matrixMetadata) {
try {
MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
matrixObject.acquireModify(matrixBlock);
matrixObject.release();
return matrixObject;
} catch (DMLRuntimeException e) {
throw new MLContextException("Exception converting MatrixBlock to MatrixObject", e);
}
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MLContextConversionUtil method javaRDDStringIJVToFrameObject.
/**
* Convert a {@code JavaRDD<String>} in IJV format to a {@code FrameObject}
* . Note that metadata is required for IJV format.
*
* @param javaRDD
* the Java RDD of strings
* @param frameMetadata
* frame metadata
* @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
*/
public static FrameObject javaRDDStringIJVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
JavaPairRDD<Long, FrameBlock> rdd;
try {
ValueType[] lschema = null;
if (lschema == null)
lschema = UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc(), javaPairRDDText, mc, lschema);
} catch (DMLRuntimeException e) {
e.printStackTrace();
return null;
}
frameObject.setRDDHandle(new RDDObject(rdd));
return frameObject;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class RewriteRemovePersistentReadWrite method rule_RemovePersistentDataOp.
private void rule_RemovePersistentDataOp(Hop hop) {
// check mark processed
if (hop.isVisited())
return;
// recursively process childs
ArrayList<Hop> inputs = hop.getInput();
for (int i = 0; i < inputs.size(); i++) rule_RemovePersistentDataOp(inputs.get(i));
// remove cast if unnecessary
if (hop instanceof DataOp) {
DataOp dop = (DataOp) hop;
DataOpTypes dotype = dop.getDataOpType();
switch(dotype) {
case PERSISTENTREAD:
if (_inputs.contains(dop.getName())) {
dop.setDataOpType(DataOpTypes.TRANSIENTREAD);
if (hop.getDataType() == DataType.SCALAR) {
dop.removeInput("iofilename");
}
// disable unnecessary reblock of binary block w/ equal block sizes
if (dop.requiresReblock() && _inputsMeta.containsKey(dop.getName()) && _inputsMeta.get(dop.getName()) instanceof MetaDataFormat) {
MetaDataFormat meta = (MetaDataFormat) _inputsMeta.get(dop.getName());
MatrixCharacteristics mc = meta.getMatrixCharacteristics();
boolean matchingBlksz = mc.getRowsPerBlock() == dop.getRowsInBlock() && mc.getColsPerBlock() == dop.getColsInBlock();
// binary matrix w/ matching dims and frames do not require reblock
if (meta.getInputInfo() == InputInfo.BinaryBlockInputInfo && (matchingBlksz || dop.getDataType() == DataType.FRAME)) {
dop.setRequiresReblock(false);
}
}
} else
LOG.warn("Non-registered persistent read of variable '" + dop.getName() + "' (line " + dop.getBeginLine() + ").");
break;
case PERSISTENTWRITE:
if (_outputs.contains(dop.getName())) {
dop.setDataOpType(DataOpTypes.TRANSIENTWRITE);
dop.setRowsInBlock(dop.getInput().get(0).getRowsInBlock());
dop.setColsInBlock(dop.getInput().get(0).getColsInBlock());
if (hop.getDataType() == DataType.SCALAR) {
dop.removeInput("iofilename");
}
} else
LOG.warn("Non-registered persistent write of variable '" + dop.getName() + "' (line " + dop.getBeginLine() + ").");
break;
default:
}
}
// mark processed
hop.setVisited();
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class InterProceduralAnalysis method populateLocalVariableMapForFunctionCall.
private static void populateLocalVariableMapForFunctionCall(FunctionStatement fstmt, FunctionOp fop, LocalVariableMap callvars, LocalVariableMap vars, FunctionCallSizeInfo fcallSizes) {
ArrayList<DataIdentifier> inputVars = fstmt.getInputParams();
ArrayList<Hop> inputOps = fop.getInput();
String fkey = fop.getFunctionKey();
for (int i = 0; i < inputVars.size(); i++) {
// create mapping between input hops and vars
DataIdentifier dat = inputVars.get(i);
Hop input = inputOps.get(i);
if (input.getDataType() == DataType.MATRIX) {
// propagate matrix characteristics
MatrixObject mo = new MatrixObject(ValueType.DOUBLE, null);
MatrixCharacteristics mc = new MatrixCharacteristics(input.getDim1(), input.getDim2(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), fcallSizes.isSafeNnz(fkey, i) ? input.getNnz() : -1);
MetaDataFormat meta = new MetaDataFormat(mc, null, null);
mo.setMetaData(meta);
vars.put(dat.getName(), mo);
} else if (input.getDataType() == DataType.SCALAR) {
// (for multiple calls, literal equivalence already checked)
if (input instanceof LiteralOp) {
vars.put(dat.getName(), ScalarObjectFactory.createScalarObject(input.getValueType(), (LiteralOp) input));
} else // and input scalar is existing variable in symbol table
if (PROPAGATE_SCALAR_VARS_INTO_FUN && fcallSizes.getFunctionCallCount(fkey) == 1 && input instanceof DataOp) {
Data scalar = callvars.get(input.getName());
if (scalar != null && scalar instanceof ScalarObject) {
vars.put(dat.getName(), scalar);
}
}
}
}
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class Recompiler method checkCPReblock.
/**
* CP Reblock check for spark instructions; in contrast to MR, we can not
* rely on the input file sizes because inputs might be passed via rdds.
*
* @param ec execution context
* @param varin variable
* @return true if CP reblock?
*/
public static boolean checkCPReblock(ExecutionContext ec, String varin) {
CacheableData<?> obj = ec.getCacheableData(varin);
MatrixCharacteristics mc = ec.getMatrixCharacteristics(varin);
long rows = mc.getRows();
long cols = mc.getCols();
long nnz = mc.getNonZeros();
// check valid cp reblock recompilation hook
if (!ConfigurationManager.isDynamicRecompilation() || !OptimizerUtils.isHybridExecutionMode()) {
return false;
}
// robustness for usage through mlcontext (key/values of input rdds are
// not serializable for text; also bufferpool rdd read only supported for
// binarycell and binaryblock)
MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
if (obj.getRDDHandle() != null && iimd.getInputInfo() != InputInfo.BinaryBlockInputInfo && iimd.getInputInfo() != InputInfo.BinaryCellInputInfo) {
return false;
}
// robustness unknown dimensions, e.g., for csv reblock
if (rows <= 0 || cols <= 0) {
try {
long size = MapReduceTool.getFilesizeOnHDFS(new Path(obj.getFileName()));
return (size < OptimizerUtils.getLocalMemBudget() && size < CP_CSV_REBLOCK_UNKNOWN_THRESHOLD_SIZE * OptimizerUtils.getParallelTextReadParallelism());
} catch (IllegalArgumentException | IOException ex) {
throw new DMLRuntimeException(ex);
}
}
// check valid dimensions and memory requirements
double sp = OptimizerUtils.getSparsity(rows, cols, nnz);
double mem = MatrixBlock.estimateSizeInMemory(rows, cols, sp);
if (!OptimizerUtils.isValidCPDimensions(rows, cols) || !OptimizerUtils.isValidCPMatrixSize(rows, cols, sp) || mem >= OptimizerUtils.getLocalMemBudget()) {
return false;
}
// check in-memory reblock size threshold (preference: distributed)
// conservative estimate
long estFilesize = (long) (3.5 * mem);
long cpThreshold = CP_REBLOCK_THRESHOLD_SIZE * OptimizerUtils.getParallelTextReadParallelism();
return (estFilesize < cpThreshold);
}
Aggregations