use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.
the class MergedMRJobInstruction method constructJobReturn.
public JobReturn constructJobReturn(long instID, JobReturn retAll) {
// get output offset and len
int off = outIxOffs.get(instID);
int len = outIxLens.get(instID);
// create partial output meta data
JobReturn ret = new JobReturn();
ret.successful = retAll.successful;
if (ret.successful) {
ret.metadata = new MetaData[len];
System.arraycopy(retAll.metadata, off, ret.metadata, 0, len);
}
return ret;
}
use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.
the class RunMRJobs method executeInMemoryReblockOperations.
private static JobReturn executeInMemoryReblockOperations(MRJobInstruction inst, String shuffleInst, MatrixObject[] inputMatrices, MatrixObject[] outputMatrices) {
MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
ReblockInstruction[] rblkSet = MRInstructionParser.parseReblockInstructions(shuffleInst);
byte[] results = inst.getIv_resultIndices();
for (ReblockInstruction rblk : rblkSet) {
// CP Reblock through caching framework (no copy required: same data, next op copies)
MatrixBlock mb = inputMatrices[rblk.input].acquireRead();
for (int i = 0; i < results.length; i++) if (rblk.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), rblk.brlen, rblk.bclen, mb.getNonZeros());
}
inputMatrices[rblk.input].release();
}
return new JobReturn(mc, inst.getOutputInfos(), true);
}
use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.
the class RunMRJobs method prepareAndSubmitJob.
/**
* Wrapper for submitting MR job instructions incl preparation and actual submission.
* The preparation includes (1) pulling stats out of symbol table and populating the
* instruction, (2) instruction patching, and (3) export of in-memory matrices if
* required.
*
* Furthermore, this wrapper also provides a hook for runtime piggybacking to intercept
* concurrent job submissions in order to collect and merge instructions.
*
* @param inst instruction
* @param ec execution context
* @return job status
*/
public static JobReturn prepareAndSubmitJob(MRJobInstruction inst, ExecutionContext ec) {
// Obtain references to all input matrices
MatrixObject[] inputMatrices = inst.extractInputMatrices(ec);
// note: for REBLOCK postponed until we know if necessary
if (!(inst.getJobType() == JobType.REBLOCK)) {
// export matrices
for (MatrixObject m : inputMatrices) {
if (m.isDirty() || m.getRDDHandle() != null)
m.exportData();
}
// check input files
checkEmptyInputs(inst, inputMatrices);
}
// Obtain references to all output matrices
inst.extractOutputMatrices(ec);
// obtain original state
String rdInst = inst.getIv_randInstructions();
String rrInst = inst.getIv_recordReaderInstructions();
String mapInst = inst.getIv_instructionsInMapper();
String shuffleInst = inst.getIv_shuffleInstructions();
String aggInst = inst.getIv_aggInstructions();
String otherInst = inst.getIv_otherInstructions();
// variable patching (replace placeholders with variables)
inst.setIv_randInstructions(updateLabels(rdInst, ec.getVariables()));
inst.setIv_recordReaderInstructions(updateLabels(rrInst, ec.getVariables()));
inst.setIv_instructionsInMapper(updateLabels(mapInst, ec.getVariables()));
inst.setIv_shuffleInstructions(updateLabels(shuffleInst, ec.getVariables()));
inst.setIv_aggInstructions(updateLabels(aggInst, ec.getVariables()));
inst.setIv_otherInstructions(updateLabels(otherInst, ec.getVariables()));
// runtime piggybacking if applicable
JobReturn ret = null;
if (OptimizerUtils.ALLOW_RUNTIME_PIGGYBACKING && RuntimePiggybacking.isActive() && RuntimePiggybacking.isSupportedJobType(inst.getJobType())) {
ret = RuntimePiggybacking.submitJob(inst);
} else
ret = submitJob(inst);
// reset original state
inst.setIv_randInstructions(rdInst);
inst.setIv_recordReaderInstructions(rrInst);
inst.setIv_instructionsInMapper(mapInst);
inst.setIv_shuffleInstructions(shuffleInst);
inst.setIv_aggInstructions(aggInst);
inst.setIv_otherInstructions(otherInst);
return ret;
}
use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.
the class RunMRJobs method executeInMemoryDataGenOperations.
private static JobReturn executeInMemoryDataGenOperations(MRJobInstruction inst, String randInst, MatrixObject[] outputMatrices) {
MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
DataGenMRInstruction[] dgSet = MRInstructionParser.parseDataGenInstructions(randInst);
byte[] results = inst.getIv_resultIndices();
for (DataGenMRInstruction ldgInst : dgSet) {
if (ldgInst instanceof RandInstruction) {
// CP Rand block operation
RandInstruction lrand = (RandInstruction) ldgInst;
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(lrand.getProbabilityDensityFunction(), (int) lrand.getRows(), (int) lrand.getCols(), lrand.getRowsInBlock(), lrand.getColsInBlock(), lrand.getSparsity(), lrand.getMinValue(), lrand.getMaxValue(), lrand.getPdfParams());
MatrixBlock mb = MatrixBlock.randOperations(rgen, lrand.getSeed());
for (int i = 0; i < results.length; i++) if (lrand.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lrand.getRowsInBlock(), lrand.getColsInBlock(), mb.getNonZeros());
}
} else if (ldgInst instanceof SeqInstruction) {
SeqInstruction lseq = (SeqInstruction) ldgInst;
MatrixBlock mb = MatrixBlock.seqOperations(lseq.fromValue, lseq.toValue, lseq.incrValue);
for (int i = 0; i < results.length; i++) if (lseq.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lseq.getRowsInBlock(), lseq.getColsInBlock(), mb.getNonZeros());
}
}
}
return new JobReturn(mc, inst.getOutputInfos(), true);
}
use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.
the class RunMRJobs method submitJob.
/**
* Submits an MR job instruction, without modifying any state of that instruction.
*
* @param inst instruction
* @return job status
*/
public static JobReturn submitJob(MRJobInstruction inst) {
JobReturn ret = new JobReturn();
MatrixObject[] inputMatrices = inst.getInputMatrices();
MatrixObject[] outputMatrices = inst.getOutputMatrices();
boolean execCP = false;
// Spawn MapReduce Jobs
try {
// replace all placeholders in all instructions with appropriate values
String rdInst = inst.getIv_randInstructions();
String rrInst = inst.getIv_recordReaderInstructions();
String mapInst = inst.getIv_instructionsInMapper();
String shuffleInst = inst.getIv_shuffleInstructions();
String aggInst = inst.getIv_aggInstructions();
String otherInst = inst.getIv_otherInstructions();
boolean jvmReuse = ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.JVM_REUSE);
switch(inst.getJobType()) {
case GMR:
case GMRCELL:
ret = GMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), inst.getPartitioned(), inst.getPformats(), inst.getPsizes(), rrInst, mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
break;
case DATAGEN:
if (ConfigurationManager.isDynamicRecompilation() && OptimizerUtils.ALLOW_RAND_JOB_RECOMPILE && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPDataGen(inst, rdInst)) {
ret = executeInMemoryDataGenOperations(inst, rdInst, outputMatrices);
Statistics.decrementNoOfExecutedMRJobs();
execCP = true;
} else {
ret = DataGenMR.runJob(inst, rdInst.split(Lop.INSTRUCTION_DELIMITOR), mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
}
break;
case CM_COV:
ret = CMCOVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case GROUPED_AGG:
ret = GroupedAggMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
break;
case REBLOCK:
case CSV_REBLOCK:
if (ConfigurationManager.isDynamicRecompilation() && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPReblock(inst, inputMatrices)) {
ret = executeInMemoryReblockOperations(inst, shuffleInst, inputMatrices, outputMatrices);
Statistics.decrementNoOfExecutedMRJobs();
execCP = true;
} else {
// export dirty matrices to HDFS (initially deferred)
for (MatrixObject m : inputMatrices) {
if (m.isDirty())
m.exportData();
}
checkEmptyInputs(inst, inputMatrices);
if (inst.getJobType() == JobType.REBLOCK) {
ret = ReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), getNNZ(inputMatrices), mapInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
} else if (inst.getJobType() == JobType.CSV_REBLOCK) {
ret = CSVReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
}
}
break;
case CSV_WRITE:
ret = WriteCSVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBclens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs());
break;
case MMCJ:
ret = MMCJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0]);
break;
case MMRJ:
ret = MMRJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case SORT:
boolean weightsflag = true;
if (!mapInst.equalsIgnoreCase(""))
weightsflag = false;
ret = SortMR.runJob(inst, inst.getInputs()[0], inst.getInputInfos()[0], inst.getRlens()[0], inst.getClens()[0], inst.getBrlens()[0], inst.getBclens()[0], mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0], weightsflag);
break;
case COMBINE:
ret = CombineMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case DATA_PARTITION:
ret = DataPartitionMR.runJob(inst, inputMatrices, shuffleInst, inst.getIv_resultIndices(), outputMatrices, inst.getIv_numReducers(), inst.getIv_replication());
break;
default:
throw new DMLRuntimeException("Invalid jobtype: " + inst.getJobType());
}
}// end of try block
catch (Exception e) {
throw new DMLRuntimeException(e);
}
if (ret.checkReturnStatus()) {
/*
* Check if any output is empty. If yes, create a dummy file. Needs
* to be done only in case of (1) CellOutputInfo and if not CP, or
* (2) BinaryBlockOutputInfo if not CP and output empty blocks disabled.
*/
try {
if (!execCP) {
for (int i = 0; i < outputMatrices.length; i++) {
// get output meta data
MetaDataFormat meta = (MetaDataFormat) outputMatrices[i].getMetaData();
MatrixCharacteristics mc = meta.getMatrixCharacteristics();
OutputInfo outinfo = meta.getOutputInfo();
String fname = outputMatrices[i].getFileName();
if (MapReduceTool.isHDFSFileEmpty(fname)) {
// prepare output file
Path filepath = new Path(fname, "0-m-00000");
MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(outinfo);
writer.writeEmptyMatrixToHDFS(filepath.toString(), mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
}
outputMatrices[i].setHDFSFileExists(true);
if (inst.getJobType() != JobType.CSV_WRITE) {
// write out metadata file
// Currently, valueType information in not stored in MR instruction,
// since only DOUBLE matrices are supported ==> hard coded the value type information for now
MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, ret.getMetaData(i).getMatrixCharacteristics(), outinfo);
}
}
}
return ret;
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
// should not come here!
throw new DMLRuntimeException("Unexpected Job Type: " + inst.getJobType());
}
Aggregations