use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class ResultMergeLocalMemory method createNewMatrixObject.
private MatrixObject createNewMatrixObject(MatrixBlock data) {
ValueType vt = _output.getValueType();
MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
MatrixObject moNew = new MatrixObject(vt, _outputFName);
// create deep copy of metadata obj
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
mc.setNonZeros(data.getNonZeros());
MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
moNew.setMetaData(meta);
// adjust dense/sparse representation
data.examSparsity();
// release new output
moNew.acquireModify(data);
moNew.release();
return moNew;
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class ResultMergeRemoteMR method executeParallelMerge.
@Override
public MatrixObject executeParallelMerge(int par) {
// always create new matrix object (required for nested parallelism)
MatrixObject moNew = null;
if (LOG.isTraceEnabled())
LOG.trace("ResultMerge (remote, mr): Execute serial merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
try {
// collect all relevant inputs
Collection<String> srcFnames = new LinkedList<>();
ArrayList<MatrixObject> inMO = new ArrayList<>();
for (MatrixObject in : _inputs) {
// check for empty inputs (no iterations executed)
if (in != null && in != _output) {
// ensure that input file resides on disk
in.exportData();
// add to merge list
srcFnames.add(in.getFileName());
inMO.add(in);
}
}
if (!srcFnames.isEmpty()) {
// ensure that outputfile (for comparison) resides on disk
_output.exportData();
// actual merge
MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
String fnameCompare = _output.getFileName();
if (mcOld.getNonZeros() == 0)
// no compare required
fnameCompare = null;
executeMerge(fnameCompare, _outputFName, srcFnames.toArray(new String[0]), metadata.getInputInfo(), metadata.getOutputInfo(), mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
// create new output matrix (e.g., to prevent potential export<->read file access conflict
moNew = new MatrixObject(_output.getValueType(), _outputFName);
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
mc.setNonZeros(_isAccum ? -1 : computeNonZeros(_output, inMO));
MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
moNew.setMetaData(meta);
} else {
// return old matrix, to prevent copy
moNew = _output;
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return moNew;
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class ParForProgramBlock method executeRemoteMRParForDP.
private void executeRemoteMRParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
/* Step 0) check and recompile MR inst
* Step 1) serialize child PB and inst
* Step 2) create and serialize tasks
* Step 3) submit MR Jobs and wait for results
* Step 4) collect results from each parallel worker
*/
Timing time = (_monitor ? new Timing(true) : null);
// Step 0) check and compile to CP (if forced remote parfor)
boolean flagForced = checkMRAndRecompileToCP(0);
// Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
// mark matrix var as partitioned
inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
// Step 2) init parallel workers (serialize PBs)
// NOTES: each mapper changes filenames with regard to his ID as we submit a single
// job, cannot reuse serialized string, since variables are serialized as well.
ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
String program = ProgramConverter.serializeParForBody(body);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
// Step 3) create tasks
TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
String resultFile = constructResultFileName();
long numIterations = partitioner.getNumIterations();
// partitioner.createTasks().size();
long numCreatedTasks = numIterations;
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
// write matrices to HDFS
exportMatricesToHDFS(ec);
// Step 4) submit MR job (wait for finished work)
OutputInfo inputOI = ((inputMatrix.getSparsity() < 0.1 && inputDPF == PartitionFormat.COLUMN_WISE) || (inputMatrix.getSparsity() < 0.001 && inputDPF == PartitionFormat.ROW_WISE)) ? OutputInfo.BinaryCellOutputInfo : OutputInfo.BinaryBlockOutputInfo;
RemoteParForJobReturn ret = RemoteDPParForMR.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, resultFile, inputMatrix, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads, _replicationDP);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
// Step 5) collecting results from each parallel worker
int numExecutedTasks = ret.getNumExecutedTasks();
int numExecutedIterations = ret.getNumExecutedIterations();
// consolidate results into global symbol table
consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
if (// see step 0
flagForced)
releaseForcedRecompile(0);
inputMatrix.unsetPartitioned();
if (_monitor) {
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
}
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class ParForProgramBlock method executeRemoteSparkParForDP.
private void executeRemoteSparkParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
Timing time = (_monitor ? new Timing(true) : null);
// Step 0) check and compile to CP (if forced remote parfor)
boolean flagForced = checkMRAndRecompileToCP(0);
// Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
// mark matrix var as partitioned
inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
// Step 2) init parallel workers (serialize PBs)
// NOTES: each mapper changes filenames with regard to his ID as we submit a single
// job, cannot reuse serialized string, since variables are serialized as well.
ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
HashMap<String, byte[]> clsMap = new HashMap<>();
String program = ProgramConverter.serializeParForBody(body, clsMap);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
// Step 3) create tasks
TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
String resultFile = constructResultFileName();
long numIterations = partitioner.getNumIterations();
// partitioner.createTasks().size();
long numCreatedTasks = numIterations;
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
// write matrices to HDFS, except DP matrix which is the input to the RemoteDPParForSpark job
exportMatricesToHDFS(ec, _colocatedDPMatrix);
// Step 4) submit MR job (wait for finished work)
// TODO runtime support for binary cell partitioning
OutputInfo inputOI = OutputInfo.BinaryBlockOutputInfo;
RemoteParForJobReturn ret = RemoteDPParForSpark.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, clsMap, resultFile, inputMatrix, ec, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
// Step 5) collecting results from each parallel worker
int numExecutedTasks = ret.getNumExecutedTasks();
int numExecutedIterations = ret.getNumExecutedIterations();
// consolidate results into global symbol table
consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
if (// see step 0
flagForced)
releaseForcedRecompile(0);
inputMatrix.unsetPartitioned();
if (_monitor) {
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
}
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class VariableCPInstruction method parseInstruction.
public static VariableCPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
VariableOperationCode voc = getVariableOperationCode(opcode);
if (voc == VariableOperationCode.CreateVariable) {
if (// && parts.length != 10 )
parts.length < 5)
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
} else if (voc == VariableOperationCode.MoveVariable) {
// mvvar tempA A; or mvvar mvar5 "data/out.mtx" "binary"
if (parts.length != 3 && parts.length != 4)
throw new DMLRuntimeException("Invalid number of operands in mvvar instruction: " + str);
} else if (voc == VariableOperationCode.Write) {
// Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
if (parts.length != 5 && parts.length != 8)
throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
} else {
if (voc != VariableOperationCode.RemoveVariable)
// no output
InstructionUtils.checkNumFields(parts, getArity(voc));
}
CPOperand in1 = null, in2 = null, in3 = null, in4 = null, out = null;
switch(voc) {
case CreateVariable:
// variable name
DataType dt = DataType.valueOf(parts[4]);
ValueType vt = dt == DataType.MATRIX ? ValueType.DOUBLE : ValueType.STRING;
int extSchema = (dt == DataType.FRAME && parts.length >= 13) ? 1 : 0;
in1 = new CPOperand(parts[1], vt, dt);
// file name
in2 = new CPOperand(parts[2], ValueType.STRING, DataType.SCALAR);
// file name override flag (always literal)
in3 = new CPOperand(parts[3], ValueType.BOOLEAN, DataType.SCALAR);
// format
String fmt = parts[5];
if (fmt.equalsIgnoreCase("csv")) {
// 14 inputs: createvar corresponding to READ -- includes properties hasHeader, delim, fill, and fillValue
if (parts.length < 15 + extSchema || parts.length > 17 + extSchema)
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
} else {
if (parts.length != 6 && parts.length != 12 + extSchema)
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
}
OutputInfo oi = OutputInfo.stringToOutputInfo(fmt);
InputInfo ii = OutputInfo.getMatchingInputInfo(oi);
MatrixCharacteristics mc = new MatrixCharacteristics();
if (parts.length == 6) {
// do nothing
} else if (parts.length >= 11) {
// matrix characteristics
mc.setDimension(Long.parseLong(parts[6]), Long.parseLong(parts[7]));
mc.setBlockSize(Integer.parseInt(parts[8]), Integer.parseInt(parts[9]));
mc.setNonZeros(Long.parseLong(parts[10]));
} else {
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
}
MetaDataFormat iimd = new MetaDataFormat(mc, oi, ii);
UpdateType updateType = UpdateType.COPY;
if (parts.length >= 12)
updateType = UpdateType.valueOf(parts[11].toUpperCase());
// handle frame schema
String schema = (dt == DataType.FRAME && parts.length >= 13) ? parts[parts.length - 1] : null;
if (fmt.equalsIgnoreCase("csv")) {
// Cretevar instructions for CSV format either has 13 or 14 inputs.
// 13 inputs: createvar corresponding to WRITE -- includes properties hasHeader, delim, and sparse
// 14 inputs: createvar corresponding to READ -- includes properties hasHeader, delim, fill, and fillValue
FileFormatProperties fmtProperties = null;
if (parts.length == 15 + extSchema) {
boolean hasHeader = Boolean.parseBoolean(parts[12]);
String delim = parts[13];
boolean sparse = Boolean.parseBoolean(parts[14]);
fmtProperties = new CSVFileFormatProperties(hasHeader, delim, sparse);
} else {
boolean hasHeader = Boolean.parseBoolean(parts[12]);
String delim = parts[13];
boolean fill = Boolean.parseBoolean(parts[14]);
double fillValue = UtilFunctions.parseToDouble(parts[15]);
String naStrings = null;
if (parts.length == 17 + extSchema)
naStrings = parts[16];
fmtProperties = new CSVFileFormatProperties(hasHeader, delim, fill, fillValue, naStrings);
}
return new VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3, iimd, updateType, fmtProperties, schema, opcode, str);
} else {
return new VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3, iimd, updateType, schema, opcode, str);
}
case AssignVariable:
in1 = new CPOperand(parts[1]);
in2 = new CPOperand(parts[2]);
break;
case CopyVariable:
// Value types are not given here
in1 = new CPOperand(parts[1], ValueType.UNKNOWN, DataType.UNKNOWN);
in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN);
break;
case MoveVariable:
in1 = new CPOperand(parts[1], ValueType.UNKNOWN, DataType.UNKNOWN);
in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN);
if (parts.length > 3)
in3 = new CPOperand(parts[3], ValueType.UNKNOWN, DataType.UNKNOWN);
break;
case RemoveVariable:
VariableCPInstruction rminst = new VariableCPInstruction(getVariableOperationCode(opcode), null, null, null, out, opcode, str);
for (int i = 1; i < parts.length; i++) rminst.addInput(new CPOperand(parts[i], ValueType.UNKNOWN, DataType.SCALAR));
return rminst;
case RemoveVariableAndFile:
in1 = new CPOperand(parts[1]);
in2 = new CPOperand(parts[2]);
// second argument must be a boolean
if (in2.getValueType() != ValueType.BOOLEAN)
throw new DMLRuntimeException("Unexpected value type for second argument in: " + str);
break;
case CastAsScalarVariable:
case CastAsMatrixVariable:
case CastAsFrameVariable:
case CastAsDoubleVariable:
case CastAsIntegerVariable:
case CastAsBooleanVariable:
// first operand is a variable name => string value type
in1 = new CPOperand(parts[1]);
// output variable name
out = new CPOperand(parts[2]);
break;
case Write:
in1 = new CPOperand(parts[1]);
in2 = new CPOperand(parts[2]);
in3 = new CPOperand(parts[3]);
FileFormatProperties fprops = null;
if (in3.getName().equalsIgnoreCase("csv")) {
boolean hasHeader = Boolean.parseBoolean(parts[4]);
String delim = parts[5];
boolean sparse = Boolean.parseBoolean(parts[6]);
fprops = new CSVFileFormatProperties(hasHeader, delim, sparse);
// description
in4 = new CPOperand(parts[7]);
} else {
fprops = new FileFormatProperties();
// description
in4 = new CPOperand(parts[4]);
}
VariableCPInstruction inst = new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, out, null, fprops, null, null, opcode, str);
inst.addInput(in4);
return inst;
case Read:
in1 = new CPOperand(parts[1]);
in2 = new CPOperand(parts[2]);
out = null;
break;
case SetFileName:
// variable name
in1 = new CPOperand(parts[1]);
// file name
in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN);
// option: remote or local
in3 = new CPOperand(parts[3], ValueType.UNKNOWN, DataType.UNKNOWN);
// return new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, str);
break;
}
return new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, out, opcode, str);
}
Aggregations