use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class ProgramConverter method parseDataObject.
/**
* NOTE: MRJobConfiguration cannot be used for the general case because program blocks and
* related symbol tables can be hierarchically structured.
*
* @param in data object as string
* @return array of objects
*/
public static Object[] parseDataObject(String in) {
Object[] ret = new Object[2];
StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
String name = st.nextToken();
DataType datatype = DataType.valueOf(st.nextToken());
ValueType valuetype = ValueType.valueOf(st.nextToken());
String valString = st.hasMoreTokens() ? st.nextToken() : "";
Data dat = null;
switch(datatype) {
case SCALAR:
{
switch(valuetype) {
case INT:
dat = new IntObject(Long.parseLong(valString));
break;
case DOUBLE:
dat = new DoubleObject(Double.parseDouble(valString));
break;
case BOOLEAN:
dat = new BooleanObject(Boolean.parseBoolean(valString));
break;
case STRING:
dat = new StringObject(valString);
break;
default:
throw new DMLRuntimeException("Unable to parse valuetype " + valuetype);
}
break;
}
case MATRIX:
{
MatrixObject mo = new MatrixObject(valuetype, valString);
long rows = Long.parseLong(st.nextToken());
long cols = Long.parseLong(st.nextToken());
int brows = Integer.parseInt(st.nextToken());
int bcols = Integer.parseInt(st.nextToken());
long nnz = Long.parseLong(st.nextToken());
InputInfo iin = InputInfo.stringToInputInfo(st.nextToken());
OutputInfo oin = OutputInfo.stringToOutputInfo(st.nextToken());
PartitionFormat partFormat = PartitionFormat.valueOf(st.nextToken());
UpdateType inplace = UpdateType.valueOf(st.nextToken());
MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, brows, bcols, nnz);
MetaDataFormat md = new MetaDataFormat(mc, oin, iin);
mo.setMetaData(md);
if (partFormat._dpf != PDataPartitionFormat.NONE)
mo.setPartitioned(partFormat._dpf, partFormat._N);
mo.setUpdateType(inplace);
dat = mo;
break;
}
default:
throw new DMLRuntimeException("Unable to parse datatype " + datatype);
}
ret[0] = name;
ret[1] = dat;
return ret;
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class ResultMergeLocalFile method createNewMatrixObject.
private MatrixObject createNewMatrixObject(MatrixObject output, ArrayList<MatrixObject> inMO) {
MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
MatrixObject moNew = new MatrixObject(_output.getValueType(), _outputFName);
// create deep copy of metadata obj
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
mc.setNonZeros(_isAccum ? -1 : computeNonZeros(output, inMO));
MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
moNew.setMetaData(meta);
return moNew;
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class ResultMergeLocalMemory method createNewMatrixObject.
private MatrixObject createNewMatrixObject(MatrixBlock data) {
ValueType vt = _output.getValueType();
MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
MatrixObject moNew = new MatrixObject(vt, _outputFName);
// create deep copy of metadata obj
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
mc.setNonZeros(data.getNonZeros());
MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
moNew.setMetaData(meta);
// adjust dense/sparse representation
data.examSparsity();
// release new output
moNew.acquireModify(data);
moNew.release();
return moNew;
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class ResultMergeRemoteMR method executeParallelMerge.
@Override
public MatrixObject executeParallelMerge(int par) {
// always create new matrix object (required for nested parallelism)
MatrixObject moNew = null;
if (LOG.isTraceEnabled())
LOG.trace("ResultMerge (remote, mr): Execute serial merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
try {
// collect all relevant inputs
Collection<String> srcFnames = new LinkedList<>();
ArrayList<MatrixObject> inMO = new ArrayList<>();
for (MatrixObject in : _inputs) {
// check for empty inputs (no iterations executed)
if (in != null && in != _output) {
// ensure that input file resides on disk
in.exportData();
// add to merge list
srcFnames.add(in.getFileName());
inMO.add(in);
}
}
if (!srcFnames.isEmpty()) {
// ensure that outputfile (for comparison) resides on disk
_output.exportData();
// actual merge
MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
String fnameCompare = _output.getFileName();
if (mcOld.getNonZeros() == 0)
// no compare required
fnameCompare = null;
executeMerge(fnameCompare, _outputFName, srcFnames.toArray(new String[0]), metadata.getInputInfo(), metadata.getOutputInfo(), mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
// create new output matrix (e.g., to prevent potential export<->read file access conflict
moNew = new MatrixObject(_output.getValueType(), _outputFName);
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
mc.setNonZeros(_isAccum ? -1 : computeNonZeros(_output, inMO));
MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
moNew.setMetaData(meta);
} else {
// return old matrix, to prevent copy
moNew = _output;
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return moNew;
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class ResultMergeRemoteSpark method executeMerge.
@SuppressWarnings("unchecked")
protected RDDObject executeMerge(MatrixObject compare, MatrixObject[] inputs, long rlen, long clen, int brlen, int bclen) {
String jobname = "ParFor-RMSP";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
SparkExecutionContext sec = (SparkExecutionContext) _ec;
boolean withCompare = (compare != null);
RDDObject ret = null;
// determine degree of parallelism
int numRed = (int) determineNumReducers(rlen, clen, brlen, bclen, _numReducers);
// sanity check for empty src files
if (inputs == null || inputs.length == 0)
throw new DMLRuntimeException("Execute merge should never be called with no inputs.");
try {
// note: initial implementation via union over all result rdds discarded due to
// stack overflow errors with many parfor tasks, and thus many rdds
// Step 1: construct input rdd from all result files of parfor workers
// a) construct job conf with all files
InputInfo ii = InputInfo.BinaryBlockInputInfo;
JobConf job = new JobConf(ResultMergeRemoteMR.class);
job.setJobName(jobname);
job.setInputFormat(ii.inputFormatClass);
Path[] paths = new Path[inputs.length];
for (int i = 0; i < paths.length; i++) {
// ensure input exists on hdfs (e.g., if in-memory or RDD)
inputs[i].exportData();
paths[i] = new Path(inputs[i].getFileName());
// update rdd handle to allow lazy evaluation by guarding
// against cleanup of temporary result files
setRDDHandleForMerge(inputs[i], sec);
}
FileInputFormat.setInputPaths(job, paths);
// b) create rdd from input files w/ deep copy of keys and blocks
JavaPairRDD<MatrixIndexes, MatrixBlock> rdd = sec.getSparkContext().hadoopRDD(job, ii.inputFormatClass, ii.inputKeyClass, ii.inputValueClass).mapPartitionsToPair(new CopyBlockPairFunction(true), true);
// Step 2a: merge with compare
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (withCompare) {
JavaPairRDD<MatrixIndexes, MatrixBlock> compareRdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) sec.getRDDHandleForMatrixObject(compare, InputInfo.BinaryBlockInputInfo);
// merge values which differ from compare values
ResultMergeRemoteSparkWCompare cfun = new ResultMergeRemoteSparkWCompare(_isAccum);
out = // group all result blocks per key
rdd.groupByKey(numRed).join(// join compare block and result blocks
compareRdd).mapToPair(// merge result blocks w/ compare
cfun);
} else // Step 2b: merge without compare
{
// direct merge in any order (disjointness guaranteed)
out = _isAccum ? RDDAggregateUtils.sumByKeyStable(rdd, false) : RDDAggregateUtils.mergeByKey(rdd, false);
}
// Step 3: create output rdd handle w/ lineage
ret = new RDDObject(out);
for (int i = 0; i < paths.length; i++) ret.addLineageChild(inputs[i].getRDDHandle());
if (withCompare)
ret.addLineageChild(compare.getRDDHandle());
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// maintain statistics
Statistics.incrementNoOfCompiledSPInst();
Statistics.incrementNoOfExecutedSPInst();
if (DMLScript.STATISTICS) {
Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
}
return ret;
}
Aggregations