use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class ProgramConverter method cloneInstruction.
public static Instruction cloneInstruction(Instruction oInst, long pid, boolean plain, boolean cpFunctions) {
Instruction inst = null;
String tmpString = oInst.toString();
try {
if (oInst instanceof CPInstruction || oInst instanceof SPInstruction || oInst instanceof MRInstruction || oInst instanceof GPUInstruction) {
if (oInst instanceof FunctionCallCPInstruction && cpFunctions) {
FunctionCallCPInstruction tmp = (FunctionCallCPInstruction) oInst;
if (!plain) {
// safe replacement because target variables might include the function name
// note: this is no update-in-place in order to keep the original function name as basis
tmpString = tmp.updateInstStringFunctionName(tmp.getFunctionName(), tmp.getFunctionName() + CP_CHILD_THREAD + pid);
}
// otherwise: preserve function name
}
inst = InstructionParser.parseSingleInstruction(tmpString);
} else if (oInst instanceof MRJobInstruction) {
// clone via copy constructor
inst = new MRJobInstruction((MRJobInstruction) oInst);
} else
throw new DMLRuntimeException("Failed to clone instruction: " + oInst);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// save replacement of thread id references in instructions
inst = saveReplaceThreadID(inst, ProgramConverter.CP_ROOT_THREAD_ID, ProgramConverter.CP_CHILD_THREAD + pid);
return inst;
}
use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class CostEstimator method maintainMRJobInstVariableStatistics.
private void maintainMRJobInstVariableStatistics(Instruction inst, HashMap<String, VarStats> stats) {
MRJobInstruction jobinst = (MRJobInstruction) inst;
// input sizes (varname, index mapping)
String[] inVars = jobinst.getInputVars();
int index = -1;
for (String varname : inVars) {
VarStats vs = stats.get(varname);
if (vs == null)
vs = _unknownStats;
stats.put(String.valueOf(++index), vs);
}
// rand output
String rdInst = jobinst.getIv_randInstructions();
if (rdInst != null && rdInst.length() > 0) {
StringTokenizer st = new StringTokenizer(rdInst, Lop.INSTRUCTION_DELIMITOR);
while (// foreach rand instruction
st.hasMoreTokens()) {
String[] parts = InstructionUtils.getInstructionParts(st.nextToken());
byte outIndex = Byte.parseByte(parts[2]);
long rlen = parts[3].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? -1 : UtilFunctions.parseToLong(parts[3]);
long clen = parts[4].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? -1 : UtilFunctions.parseToLong(parts[4]);
int brlen = Integer.parseInt(parts[5]);
int bclen = Integer.parseInt(parts[6]);
long nnz = (long) (Double.parseDouble(parts[9]) * rlen * clen);
VarStats vs = new VarStats(rlen, clen, brlen, bclen, nnz, false);
stats.put(String.valueOf(outIndex), vs);
}
}
// compute intermediate result indices
HashMap<Byte, MatrixCharacteristics> dims = new HashMap<>();
// populate input indices
for (Entry<String, VarStats> e : stats.entrySet()) {
if (UtilFunctions.isIntegerNumber(e.getKey())) {
byte ix = Byte.parseByte(e.getKey());
VarStats vs = e.getValue();
if (vs != null) {
MatrixCharacteristics mc = new MatrixCharacteristics(vs._rlen, vs._clen, vs._brlen, vs._bclen, (long) vs._nnz);
dims.put(ix, mc);
}
}
}
// compute dims for all instructions
String[] instCat = new String[] { jobinst.getIv_randInstructions(), jobinst.getIv_recordReaderInstructions(), jobinst.getIv_instructionsInMapper(), jobinst.getIv_shuffleInstructions(), jobinst.getIv_aggInstructions(), jobinst.getIv_otherInstructions() };
for (String linstCat : instCat) if (linstCat != null && linstCat.length() > 0) {
String[] linst = linstCat.split(Instruction.INSTRUCTION_DELIM);
for (String instStr : linst) {
String instStr2 = replaceInstructionPatch(instStr);
MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(instStr2);
MatrixCharacteristics.computeDimension(dims, mrinst);
}
}
// create varstats if necessary
for (Entry<Byte, MatrixCharacteristics> e : dims.entrySet()) {
byte ix = e.getKey();
if (!stats.containsKey(String.valueOf(ix))) {
MatrixCharacteristics mc = e.getValue();
VarStats vs = new VarStats(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), mc.getNonZeros(), false);
stats.put(String.valueOf(ix), vs);
}
}
// map result indexes
String[] outLabels = jobinst.getOutputVars();
byte[] resultIndexes = jobinst.getIv_resultIndices();
for (int i = 0; i < resultIndexes.length; i++) {
String varname = outLabels[i];
VarStats varvs = stats.get(String.valueOf(resultIndexes[i]));
if (varvs == null) {
varvs = stats.get(outLabels[i]);
}
varvs._inmem = false;
stats.put(varname, varvs);
}
}
use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class CostEstimator method rGetTimeEstimate.
private double rGetTimeEstimate(ProgramBlock pb, HashMap<String, VarStats> stats, HashSet<String> memoFunc, boolean recursive) {
double ret = 0;
if (pb instanceof WhileProgramBlock) {
WhileProgramBlock tmp = (WhileProgramBlock) pb;
if (recursive)
for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
ret *= DEFAULT_NUMITER;
} else if (pb instanceof IfProgramBlock) {
IfProgramBlock tmp = (IfProgramBlock) pb;
if (recursive) {
for (ProgramBlock pb2 : tmp.getChildBlocksIfBody()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
if (tmp.getChildBlocksElseBody() != null)
for (ProgramBlock pb2 : tmp.getChildBlocksElseBody()) {
ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
// weighted sum
ret /= 2;
}
}
} else if (// includes ParFORProgramBlock
pb instanceof ForProgramBlock) {
ForProgramBlock tmp = (ForProgramBlock) pb;
if (recursive)
for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
ret *= getNumIterations(stats, tmp);
} else if (pb instanceof FunctionProgramBlock && // see generic
!(pb instanceof ExternalFunctionProgramBlock)) {
FunctionProgramBlock tmp = (FunctionProgramBlock) pb;
if (recursive)
for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
} else {
ArrayList<Instruction> tmp = pb.getInstructions();
for (Instruction inst : tmp) {
if (// CP
inst instanceof CPInstruction) {
// obtain stats from createvar, cpvar, rmvar, rand
maintainCPInstVariableStatistics((CPInstruction) inst, stats);
// extract statistics (instruction-specific)
Object[] o = extractCPInstStatistics(inst, stats);
VarStats[] vs = (VarStats[]) o[0];
String[] attr = (String[]) o[1];
// if(LOG.isDebugEnabled())
// LOG.debug(inst);
// call time estimation for inst
ret += getCPInstTimeEstimate(inst, vs, attr);
if (// functions
inst instanceof FunctionCallCPInstruction) {
FunctionCallCPInstruction finst = (FunctionCallCPInstruction) inst;
String fkey = DMLProgram.constructFunctionKey(finst.getNamespace(), finst.getFunctionName());
// awareness of recursive functions, missing program
if (!memoFunc.contains(fkey) && pb.getProgram() != null) {
if (LOG.isDebugEnabled())
LOG.debug("Begin Function " + fkey);
memoFunc.add(fkey);
Program prog = pb.getProgram();
FunctionProgramBlock fpb = prog.getFunctionProgramBlock(finst.getNamespace(), finst.getFunctionName());
ret += rGetTimeEstimate(fpb, stats, memoFunc, recursive);
memoFunc.remove(fkey);
if (LOG.isDebugEnabled())
LOG.debug("End Function " + fkey);
}
}
} else if (// MR
inst instanceof MRJobInstruction) {
// obtain stats for job
maintainMRJobInstVariableStatistics(inst, stats);
// extract input statistics
Object[] o = extractMRJobInstStatistics(inst, stats);
VarStats[] vs = (VarStats[]) o[0];
if (LOG.isDebugEnabled())
LOG.debug("Begin MRJob type=" + ((MRJobInstruction) inst).getJobType());
// call time estimation for complex MR inst
ret += getMRJobInstTimeEstimate(inst, vs, null);
if (LOG.isDebugEnabled())
LOG.debug("End MRJob");
// cleanup stats for job
cleanupMRJobVariableStatistics(inst, stats);
}
}
}
return ret;
}
use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class CostEstimator method extractMRJobInstStatistics.
private static Object[] extractMRJobInstStatistics(Instruction inst, HashMap<String, VarStats> stats) {
// stats, attrs
Object[] ret = new Object[2];
VarStats[] vs = null;
String[] attr = null;
MRJobInstruction jinst = (MRJobInstruction) inst;
// get number of indices
byte[] indexes = jinst.getIv_resultIndices();
byte maxIx = -1;
for (int i = 0; i < indexes.length; i++) if (maxIx < indexes[i])
maxIx = indexes[i];
vs = new VarStats[maxIx + 1];
// get inputs, intermediates, and outputs
for (int i = 0; i < vs.length; i++) {
vs[i] = stats.get(String.valueOf(i));
if (vs[i] == null) {
vs[i] = _unknownStats;
}
}
// result preparation
ret[0] = vs;
ret[1] = attr;
return ret;
}
use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class ExternalFunctionProgramBlock method getBlock2CellInstructions.
/**
* Method to generate instructions to convert input matrices from block to
* cell. We generate a GMR job here.
*
* @param inputParams list of data identifiers
* @param unBlockedFileNames map of unblocked file names
* @return list of instructions
*/
private ArrayList<Instruction> getBlock2CellInstructions(ArrayList<DataIdentifier> inputParams, HashMap<String, String> unBlockedFileNames) {
ArrayList<Instruction> b2cinst = null;
// list of input matrices
ArrayList<DataIdentifier> matrices = new ArrayList<>();
ArrayList<DataIdentifier> matricesNoReblock = new ArrayList<>();
// find all inputs that are matrices
for (int i = 0; i < inputParams.size(); i++) {
if (inputParams.get(i).getDataType().isMatrix()) {
if (_skipInReblock.contains(inputParams.get(i).getName()))
matricesNoReblock.add(inputParams.get(i));
else
matrices.add(inputParams.get(i));
}
}
if (!matrices.isEmpty()) {
b2cinst = new ArrayList<>();
MRJobInstruction gmrInst = new MRJobInstruction(JobType.GMR);
TreeMap<Integer, ArrayList<String>> MRJobLineNumbers = null;
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers = new TreeMap<>();
}
String gmrStr = "";
ArrayList<String> inLabels = new ArrayList<>();
ArrayList<String> outLabels = new ArrayList<>();
String[] outputs = new String[matrices.size()];
byte[] resultIndex = new byte[matrices.size()];
String scratchSpaceLoc = ConfigurationManager.getScratchSpace();
try {
// create a GMR job that transforms each of these matrices from block to cell
for (int i = 0; i < matrices.size(); i++) {
inLabels.add(matrices.get(i).getName());
outLabels.add(matrices.get(i).getName() + "_extFnInput");
// (matrices.size()+i);
resultIndex[i] = (byte) i;
outputs[i] = scratchSpaceLoc + Lop.FILE_SEPARATOR + Lop.PROCESS_PREFIX + DMLScript.getUUID() + Lop.FILE_SEPARATOR + _otherParams.get(ExternalFunctionStatement.CLASS_NAME) + _runID + "_" + i + "Input";
unBlockedFileNames.put(matrices.get(i).getName(), outputs[i]);
if (DMLScript.ENABLE_DEBUG_MODE) {
// Create a dummy gmr instruction (FOR DEBUGGER)
gmrStr = "MR" + Lop.OPERAND_DELIMITOR + "gmr" + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize();
// Set MR gmr instruction line number (FOR DEBUGGER)
if (!MRJobLineNumbers.containsKey(matrices.get(i).getBeginLine())) {
MRJobLineNumbers.put(matrices.get(i).getBeginLine(), new ArrayList<String>());
}
MRJobLineNumbers.get(matrices.get(i).getBeginLine()).add(gmrStr);
}
// create metadata instructions to populate symbol table
// with variables that hold unblocked matrices
Instruction createInst = VariableCPInstruction.prepareCreateMatrixVariableInstruction(outLabels.get(i), outputs[i], false, OutputInfo.outputInfoToString(OutputInfo.TextCellOutputInfo));
createInst.setLocation(matrices.get(i));
b2cinst.add(createInst);
}
// Finally, generate GMR instruction that performs block2cell conversion
gmrInst.setGMRInstructions(inLabels.toArray(new String[inLabels.size()]), "", "", "", "", outLabels.toArray(new String[outLabels.size()]), resultIndex, 0, 1);
b2cinst.add(gmrInst);
// generate instructions that rename the output variables of GMR job
Instruction cpInst = null, rmInst = null;
for (int i = 0; i < matrices.size(); i++) {
cpInst = VariableCPInstruction.prepareCopyInstruction(outLabels.get(i), matrices.get(i).getName());
rmInst = VariableCPInstruction.prepareRemoveInstruction(outLabels.get(i));
cpInst.setLocation(matrices.get(i));
rmInst.setLocation(matrices.get(i));
b2cinst.add(cpInst);
b2cinst.add(rmInst);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
// LOG instructions
if (LOG.isTraceEnabled()) {
LOG.trace("\n--- Block-2-Cell Instructions ---");
for (Instruction i : b2cinst) {
LOG.trace(i.toString());
}
LOG.trace("----------------------------------");
}
}
// BEGIN FUNCTION PATCH
if (!matricesNoReblock.isEmpty()) {
for (int i = 0; i < matricesNoReblock.size(); i++) {
String scratchSpaceLoc = ConfigurationManager.getScratchSpace();
String filename = scratchSpaceLoc + Lop.FILE_SEPARATOR + Lop.PROCESS_PREFIX + DMLScript.getUUID() + Lop.FILE_SEPARATOR + _otherParams.get(ExternalFunctionStatement.CLASS_NAME) + _runID + "_" + i + "Input";
unBlockedFileNames.put(matricesNoReblock.get(i).getName(), filename);
}
}
// null if no input matrices
return b2cinst;
}
Aggregations