use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class CostEstimator method cleanupMRJobVariableStatistics.
private static void cleanupMRJobVariableStatistics(Instruction inst, HashMap<String, VarStats> stats) {
MRJobInstruction jinst = (MRJobInstruction) inst;
// get number of indices
byte[] indexes = jinst.getIv_resultIndices();
byte maxIx = -1;
for (int i = 0; i < indexes.length; i++) if (maxIx < indexes[i])
maxIx = indexes[i];
// remove all stats up to max index
for (int i = 0; i <= maxIx; i++) {
VarStats tmp = stats.remove(String.valueOf(i));
if (tmp != null)
// all MR job outptus on HDFS
tmp._inmem = false;
}
}
use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class CostEstimatorStaticRuntime method getMRJobInstTimeEstimate.
@Override
protected double getMRJobInstTimeEstimate(Instruction inst, VarStats[] vs, String[] args) {
MRJobInstruction jinst = (MRJobInstruction) inst;
// infrastructure properties
boolean localJob = InfrastructureAnalyzer.isLocalMode();
int maxPMap = InfrastructureAnalyzer.getRemoteParallelMapTasks();
int maxPRed = Math.min(InfrastructureAnalyzer.getRemoteParallelReduceTasks(), ConfigurationManager.getNumReducers());
double blocksize = ((double) InfrastructureAnalyzer.getHDFSBlockSize()) / (1024 * 1024);
// correction max number of mappers/reducers on yarn clusters
if (InfrastructureAnalyzer.isYarnEnabled()) {
maxPMap = (int) Math.max(maxPMap, YarnClusterAnalyzer.getNumCores());
// artificially reduced by factor 2, in order to prefer map-side processing even if smaller degree of parallelism
maxPRed = (int) Math.max(maxPRed, YarnClusterAnalyzer.getNumCores() / 2 / 2);
}
// yarn-specific: take degree of parallelism into account
if (jinst instanceof MRJobResourceInstruction) {
int maxTasks = (int) ((MRJobResourceInstruction) jinst).getMaxMRTasks();
maxPMap = Math.min(maxPMap, maxTasks);
maxPRed = Math.min(maxPRed, maxTasks);
}
// job properties
boolean mapOnly = jinst.isMapOnly();
String rdInst = jinst.getIv_randInstructions();
String rrInst = jinst.getIv_recordReaderInstructions();
String mapInst = jinst.getIv_instructionsInMapper();
String shfInst = jinst.getIv_shuffleInstructions();
String aggInst = jinst.getIv_aggInstructions();
String otherInst = jinst.getIv_otherInstructions();
byte[] inIx = getInputIndexes(jinst.getInputVars());
byte[] retIx = jinst.getIv_resultIndices();
byte[] mapOutIx = getMapOutputIndexes(inIx, retIx, rdInst, mapInst, shfInst, aggInst, otherInst);
int numMap = computeNumMapTasks(vs, inIx, blocksize, maxPMap, jinst.getJobType());
int numPMap = Math.min(numMap, maxPMap);
// effective map dop
int numEPMap = Math.max(Math.min(numMap, maxPMap / 2), 1);
int numRed = computeNumReduceTasks(vs, mapOutIx, jinst.getJobType());
int numPRed = Math.min(numRed, maxPRed);
// effective reduce dop
int numEPRed = Math.max(Math.min(numRed, maxPRed / 2), 1);
LOG.debug("Meta nmap = " + numMap + ", nred = " + numRed + "; npmap = " + numPMap + ", npred = " + numPRed + "; nepmap = " + numEPMap + ", nepred = " + numEPRed);
// step 0: export if inputs in mem
double exportCosts = 0;
for (int i = 0; i < jinst.getInputVars().length; i++) if (vs[i]._inmem)
exportCosts += getHDFSWriteTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity());
// step 1: MR job / task latency (normalization by effective dop)
double jobLatencyCosts = localJob ? DEFAULT_MR_JOB_LATENCY_LOCAL : DEFAULT_MR_JOB_LATENCY_REMOTE;
double taskLatencyCost = (numMap / numEPMap + numEPRed) * (localJob ? DEFAULT_MR_TASK_LATENCY_LOCAL : DEFAULT_MR_TASK_LATENCY_REMOTE);
double latencyCosts = jobLatencyCosts + taskLatencyCost;
// step 2: parallel read of inputs (normalization by effective dop)
double hdfsReadCosts = 0;
for (int i = 0; i < jinst.getInputVars().length; i++) hdfsReadCosts += getHDFSReadTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity());
hdfsReadCosts /= numEPMap;
// step 3: parallel MR instructions
String[] mapperInst = new String[] { rdInst, rrInst, mapInst };
String[] reducerInst = new String[] { shfInst, aggInst, otherInst };
// map instructions compute/distcache read (normalization by effective dop)
// read through distributed cache
double mapDCReadCost = 0;
// map compute cost
double mapCosts = 0;
double shuffleCosts = 0;
// reduce compute costs
double reduceCosts = 0;
for (String instCat : mapperInst) if (instCat != null && instCat.length() > 0) {
String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
for (String tmp : linst) {
// map compute costs
Object[] o = extractMRInstStatistics(tmp, vs);
String opcode = InstructionUtils.getOpCode(tmp);
mapCosts += getInstTimeEstimate(opcode, (VarStats[]) o[0], (String[]) o[1], ExecType.MR);
// dist cache read costs
int dcIndex = getDistcacheIndex(tmp);
if (dcIndex >= 0) {
mapDCReadCost += Math.min(getFSReadTime(vs[dcIndex]._rlen, vs[dcIndex]._clen, vs[dcIndex].getSparsity()), // 32MB partitions
getFSReadTime(DistributedCacheInput.PARTITION_SIZE, 1, 1.0)) * // read in each task
numMap;
}
}
}
mapCosts /= numEPMap;
mapDCReadCost /= numEPMap;
if (!mapOnly) {
// shuffle costs (normalization by effective map/reduce dop)
for (int i = 0; i < mapOutIx.length; i++) {
shuffleCosts += (getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) * 4 / numEPRed + getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed);
// TODO this is a workaround - we need to address the number of map output blocks in a more systematic way
for (String instCat : reducerInst) if (instCat != null && instCat.length() > 0) {
String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
for (String tmp : linst) {
if (InstructionUtils.getMRType(tmp) == MRType.Aggregate)
shuffleCosts += numMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed;
}
}
}
// reduce instructions compute (normalization by effective dop)
for (String instCat : reducerInst) if (instCat != null && instCat.length() > 0) {
String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
for (String tmp : linst) {
Object[] o = extractMRInstStatistics(tmp, vs);
if (InstructionUtils.getMRType(tmp) == MRType.Aggregate)
o[1] = new String[] { String.valueOf(numMap) };
String opcode = InstructionUtils.getOpCode(tmp);
reduceCosts += getInstTimeEstimate(opcode, (VarStats[]) o[0], (String[]) o[1], ExecType.MR);
}
}
reduceCosts /= numEPRed;
}
// step 4: parallel write of outputs (normalization by effective dop)
double hdfsWriteCosts = 0;
for (int i = 0; i < jinst.getOutputVars().length; i++) {
hdfsWriteCosts += getHDFSWriteTime(vs[retIx[i]]._rlen, vs[retIx[i]]._clen, vs[retIx[i]].getSparsity());
}
hdfsWriteCosts /= ((mapOnly) ? numEPMap : numEPRed);
// debug output
if (LOG.isDebugEnabled()) {
LOG.debug("Costs Export = " + exportCosts);
LOG.debug("Costs Latency = " + latencyCosts);
LOG.debug("Costs HDFS Read = " + hdfsReadCosts);
LOG.debug("Costs Distcache Read = " + mapDCReadCost);
LOG.debug("Costs Map Exec = " + mapCosts);
LOG.debug("Costs Shuffle = " + shuffleCosts);
LOG.debug("Costs Reduce Exec = " + reduceCosts);
LOG.debug("Costs HDFS Write = " + hdfsWriteCosts);
}
// aggregate individual cost factors
return exportCosts + latencyCosts + hdfsReadCosts + mapCosts + mapDCReadCost + shuffleCosts + reduceCosts + hdfsWriteCosts;
}
use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class Dag method generateMapReduceInstructions.
/**
* Method to generate MapReduce job instructions from a given set of nodes.
*
* @param execNodes list of exec nodes
* @param inst list of instructions
* @param writeinst list of write instructions
* @param deleteinst list of delete instructions
* @param rmvarinst list of rmvar instructions
* @param jt job type
*/
private void generateMapReduceInstructions(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeinst, ArrayList<Instruction> deleteinst, ArrayList<Instruction> rmvarinst, JobType jt) {
ArrayList<Byte> resultIndices = new ArrayList<>();
ArrayList<String> inputs = new ArrayList<>();
ArrayList<String> outputs = new ArrayList<>();
ArrayList<InputInfo> inputInfos = new ArrayList<>();
ArrayList<OutputInfo> outputInfos = new ArrayList<>();
ArrayList<Long> numRows = new ArrayList<>();
ArrayList<Long> numCols = new ArrayList<>();
ArrayList<Long> numRowsPerBlock = new ArrayList<>();
ArrayList<Long> numColsPerBlock = new ArrayList<>();
ArrayList<String> mapperInstructions = new ArrayList<>();
ArrayList<String> randInstructions = new ArrayList<>();
ArrayList<String> recordReaderInstructions = new ArrayList<>();
int numReducers = 0;
int replication = 1;
ArrayList<String> inputLabels = new ArrayList<>();
ArrayList<String> outputLabels = new ArrayList<>();
ArrayList<Instruction> renameInstructions = new ArrayList<>();
ArrayList<Instruction> variableInstructions = new ArrayList<>();
ArrayList<Instruction> postInstructions = new ArrayList<>();
ArrayList<Integer> MRJobLineNumbers = null;
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers = new ArrayList<>();
}
ArrayList<Lop> inputLops = new ArrayList<>();
boolean cellModeOverride = false;
/* Find the nodes that produce an output */
ArrayList<Lop> rootNodes = new ArrayList<>();
getOutputNodes(execNodes, rootNodes, jt);
if (LOG.isTraceEnabled())
LOG.trace("# of root nodes = " + rootNodes.size());
/* Remove transient writes that are simple copy of transient reads */
if (jt == JobType.GMR || jt == JobType.GMRCELL) {
ArrayList<Lop> markedNodes = new ArrayList<>();
// only keep data nodes that are results of some computation.
for (Lop rnode : rootNodes) {
if (rnode.getExecLocation() == ExecLocation.Data && ((Data) rnode).isTransient() && ((Data) rnode).getOperationType() == OperationTypes.WRITE && ((Data) rnode).getDataType() == DataType.MATRIX) {
// no computation, just a copy
if (rnode.getInputs().get(0).getExecLocation() == ExecLocation.Data && ((Data) rnode.getInputs().get(0)).isTransient() && rnode.getOutputParameters().getLabel().equals(rnode.getInputs().get(0).getOutputParameters().getLabel())) {
markedNodes.add(rnode);
}
}
}
// delete marked nodes
rootNodes.removeAll(markedNodes);
markedNodes.clear();
if (rootNodes.isEmpty())
return;
}
// structure that maps node to their indices that will be used in the instructions
HashMap<Lop, Integer> nodeIndexMapping = new HashMap<>();
for (Lop rnode : rootNodes) {
getInputPathsAndParameters(rnode, execNodes, inputs, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
}
// In case of RAND job, instructions are defined in the input file
if (jt == JobType.DATAGEN)
randInstructions = inputs;
int[] start_index = new int[1];
start_index[0] = inputs.size();
// currently, recordreader instructions are allowed only in GMR jobs
if (jt == JobType.GMR || jt == JobType.GMRCELL) {
for (Lop rnode : rootNodes) {
getRecordReaderInstructions(rnode, execNodes, inputs, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
if (recordReaderInstructions.size() > 1)
throw new LopsException("MapReduce job can only have a single recordreader instruction: " + recordReaderInstructions.toString());
}
}
//
if (jt != JobType.REBLOCK && jt != JobType.CSV_REBLOCK && jt != JobType.DATAGEN) {
for (int i = 0; i < inputInfos.size(); i++) if (inputInfos.get(i) == InputInfo.BinaryCellInputInfo || inputInfos.get(i) == InputInfo.TextCellInputInfo)
cellModeOverride = true;
}
if (!recordReaderInstructions.isEmpty() || jt == JobType.GROUPED_AGG)
cellModeOverride = true;
for (int i = 0; i < rootNodes.size(); i++) {
getMapperInstructions(rootNodes.get(i), execNodes, inputs, mapperInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
}
if (LOG.isTraceEnabled()) {
LOG.trace(" Input strings: " + inputs.toString());
if (jt == JobType.DATAGEN)
LOG.trace(" Rand instructions: " + getCSVString(randInstructions));
if (jt == JobType.GMR)
LOG.trace(" RecordReader instructions: " + getCSVString(recordReaderInstructions));
LOG.trace(" Mapper instructions: " + getCSVString(mapperInstructions));
}
/* Get Shuffle and Reducer Instructions */
ArrayList<String> shuffleInstructions = new ArrayList<>();
ArrayList<String> aggInstructionsReducer = new ArrayList<>();
ArrayList<String> otherInstructionsReducer = new ArrayList<>();
for (Lop rn : rootNodes) {
int resultIndex = getAggAndOtherInstructions(rn, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
if (resultIndex == -1)
throw new LopsException("Unexpected error in piggybacking!");
if (rn.getExecLocation() == ExecLocation.Data && ((Data) rn).getOperationType() == Data.OperationTypes.WRITE && ((Data) rn).isTransient() && rootNodes.contains(rn.getInputs().get(0))) {
// Both rn (a transient write) and its input are root nodes.
// Instead of creating two copies of the data, simply generate a cpvar instruction
NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, true);
writeinst.addAll(out.getLastInstructions());
} else {
resultIndices.add(Byte.valueOf((byte) resultIndex));
// setup output filenames and outputInfos and generate related instructions
NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, false);
outputLabels.add(out.getVarName());
outputs.add(out.getFileName());
outputInfos.add(out.getOutInfo());
if (LOG.isTraceEnabled()) {
LOG.trace(" Output Info: " + out.getFileName() + ";" + OutputInfo.outputInfoToString(out.getOutInfo()) + ";" + out.getVarName());
}
renameInstructions.addAll(out.getLastInstructions());
variableInstructions.addAll(out.getPreInstructions());
postInstructions.addAll(out.getPostInstructions());
}
}
/* Determine if the output dimensions are known */
byte[] resultIndicesByte = new byte[resultIndices.size()];
for (int i = 0; i < resultIndicesByte.length; i++) {
resultIndicesByte[i] = resultIndices.get(i).byteValue();
}
if (LOG.isTraceEnabled()) {
LOG.trace(" Shuffle Instructions: " + getCSVString(shuffleInstructions));
LOG.trace(" Aggregate Instructions: " + getCSVString(aggInstructionsReducer));
LOG.trace(" Other instructions =" + getCSVString(otherInstructionsReducer));
LOG.trace(" Output strings: " + outputs.toString());
LOG.trace(" ResultIndices = " + resultIndices.toString());
}
/* Prepare the MapReduce job instruction */
MRJobInstruction mr = new MRJobInstruction(jt);
// check if this is a map-only job. If not, set the number of reducers
if (!shuffleInstructions.isEmpty() || !aggInstructionsReducer.isEmpty() || !otherInstructionsReducer.isEmpty())
numReducers = total_reducers;
// set inputs, outputs, and other other properties for the job
mr.setInputOutputLabels(inputLabels.toArray(new String[0]), outputLabels.toArray(new String[0]));
mr.setOutputs(resultIndicesByte);
mr.setDimsUnknownFilePrefix(getFilePath());
mr.setNumberOfReducers(numReducers);
mr.setReplication(replication);
// set instructions for recordReader and mapper
mr.setRecordReaderInstructions(getCSVString(recordReaderInstructions));
mr.setMapperInstructions(getCSVString(mapperInstructions));
// compute and set mapper memory requirements (for consistency of runtime piggybacking)
if (jt == JobType.GMR) {
double mem = 0;
for (Lop n : execNodes) mem += computeFootprintInMapper(n);
mr.setMemoryRequirements(mem);
}
if (jt == JobType.DATAGEN)
mr.setRandInstructions(getCSVString(randInstructions));
// set shuffle instructions
mr.setShuffleInstructions(getCSVString(shuffleInstructions));
// set reducer instruction
mr.setAggregateInstructionsInReducer(getCSVString(aggInstructionsReducer));
mr.setOtherInstructionsInReducer(getCSVString(otherInstructionsReducer));
if (DMLScript.ENABLE_DEBUG_MODE) {
// set line number information for each MR instruction
mr.setMRJobInstructionsLineNumbers(MRJobLineNumbers);
}
/* Add the prepared instructions to output set */
inst.addAll(variableInstructions);
inst.add(mr);
inst.addAll(postInstructions);
deleteinst.addAll(renameInstructions);
for (Lop l : inputLops) {
if (DMLScript.ENABLE_DEBUG_MODE) {
processConsumers(l, rmvarinst, deleteinst, l);
} else {
processConsumers(l, rmvarinst, deleteinst, null);
}
}
}
use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class DMLDebuggerProgramInfo method accesBreakpointInstruction.
/**
* Access breakpoint instruction at specified line number in set of instructions (if valid)
* @param instructions Instructions for current program block
* @param lineNumber Location for inserting breakpoint
* @param op Breakpoint operation
* @param status Current breakpoint status
*/
private void accesBreakpointInstruction(ArrayList<Instruction> instructions, int lineNumber, int op, BPINSTRUCTION_STATUS status) {
for (int i = 0; i < instructions.size(); i++) {
Instruction currInst = instructions.get(i);
if (op == 0) {
if (currInst instanceof MRJobInstruction) {
MRJobInstruction currMRInst = (MRJobInstruction) currInst;
// Check if current instruction line number correspond to breakpoint line number
if (currMRInst.findMRInstructions(lineNumber)) {
BreakPointInstruction breakpoint = new BreakPointInstruction();
breakpoint.setLocation(currInst);
breakpoint.setInstID(instID++);
breakpoint.setBPInstructionLocation(location);
instructions.add(i, breakpoint);
DMLBreakpointManager.insertBreakpoint(breakpoint, lineNumber);
return;
}
} else if (currInst instanceof CPInstruction || currInst instanceof SPInstruction) {
// Check if current instruction line number correspond to breakpoint line number
if (currInst.getLineNum() == lineNumber) {
BreakPointInstruction breakpoint = new BreakPointInstruction();
breakpoint.setLocation(currInst);
breakpoint.setInstID(instID++);
breakpoint.setBPInstructionLocation(location);
instructions.add(i, breakpoint);
DMLBreakpointManager.insertBreakpoint(breakpoint, lineNumber);
return;
}
} else if (currInst instanceof BreakPointInstruction && currInst.getLineNum() == lineNumber) {
BreakPointInstruction breakpoint = (BreakPointInstruction) currInst;
breakpoint.setBPInstructionStatus(BPINSTRUCTION_STATUS.ENABLED);
breakpoint.setBPInstructionLocation(location);
instructions.set(i, breakpoint);
DMLBreakpointManager.updateBreakpoint(lineNumber, status);
return;
}
} else {
// Check if current instruction line number correspond to breakpoint line number
if (currInst instanceof BreakPointInstruction && currInst.getLineNum() == lineNumber) {
if (op == 1) {
BreakPointInstruction breakpoint = (BreakPointInstruction) currInst;
breakpoint.setLocation(currInst);
breakpoint.setInstID(currInst.getInstID());
breakpoint.setBPInstructionStatus(status);
breakpoint.setBPInstructionLocation(location);
instructions.set(i, breakpoint);
DMLBreakpointManager.updateBreakpoint(lineNumber, status);
} else {
instructions.remove(i);
DMLBreakpointManager.removeBreakpoint(lineNumber, status);
}
return;
}
}
}
}
use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.
the class ExternalFunctionProgramBlock method getCell2BlockInstructions.
/**
* Method to generate a reblock job to convert the cell representation into block representation
*
* @param outputParams list out output data identifiers
* @param blockedFileNames map of blocked file names
* @return list of instructions
*/
private ArrayList<Instruction> getCell2BlockInstructions(ArrayList<DataIdentifier> outputParams, HashMap<String, String> blockedFileNames) {
ArrayList<Instruction> c2binst = null;
// list of matrices that need to be reblocked
ArrayList<DataIdentifier> matrices = new ArrayList<>();
ArrayList<DataIdentifier> matricesNoReblock = new ArrayList<>();
// identify outputs that are matrices
for (int i = 0; i < outputParams.size(); i++) {
if (outputParams.get(i).getDataType().isMatrix()) {
if (_skipOutReblock.contains(outputParams.get(i).getName()))
matricesNoReblock.add(outputParams.get(i));
else
matrices.add(outputParams.get(i));
}
}
if (!matrices.isEmpty()) {
c2binst = new ArrayList<>();
MRJobInstruction reblkInst = new MRJobInstruction(JobType.REBLOCK);
TreeMap<Integer, ArrayList<String>> MRJobLineNumbers = null;
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers = new TreeMap<>();
}
ArrayList<String> inLabels = new ArrayList<>();
ArrayList<String> outLabels = new ArrayList<>();
String[] outputs = new String[matrices.size()];
byte[] resultIndex = new byte[matrices.size()];
String reblock = "";
// Keep a copy of a single MR reblock instruction
String reblockStr = "";
String scratchSpaceLoc = ConfigurationManager.getScratchSpace();
try {
// create a RBLK job that transforms each output matrix from cell to block
for (int i = 0; i < matrices.size(); i++) {
inLabels.add(matrices.get(i).getName());
outLabels.add(matrices.get(i).getName() + "_extFnOutput");
outputs[i] = scratchSpaceLoc + Lop.FILE_SEPARATOR + Lop.PROCESS_PREFIX + DMLScript.getUUID() + Lop.FILE_SEPARATOR + _otherParams.get(ExternalFunctionStatement.CLASS_NAME) + _runID + "_" + i + "Output";
blockedFileNames.put(matrices.get(i).getName(), outputs[i]);
// (matrices.size()+i);
resultIndex[i] = (byte) i;
if (i > 0)
reblock += Lop.INSTRUCTION_DELIMITOR;
reblock += "MR" + Lop.OPERAND_DELIMITOR + "rblk" + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + "true";
if (DMLScript.ENABLE_DEBUG_MODE) {
// Create a copy of reblock instruction but as a single instruction (FOR DEBUGGER)
reblockStr = "MR" + Lop.OPERAND_DELIMITOR + "rblk" + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + "true";
// Set MR reblock instruction line number (FOR DEBUGGER)
if (!MRJobLineNumbers.containsKey(matrices.get(i).getBeginLine())) {
MRJobLineNumbers.put(matrices.get(i).getBeginLine(), new ArrayList<String>());
}
MRJobLineNumbers.get(matrices.get(i).getBeginLine()).add(reblockStr);
}
// create metadata instructions to populate symbol table
// with variables that hold blocked matrices
Instruction createInst = VariableCPInstruction.prepareCreateMatrixVariableInstruction(outLabels.get(i), outputs[i], false, OutputInfo.outputInfoToString(OutputInfo.BinaryBlockOutputInfo));
createInst.setLocation(matrices.get(i));
c2binst.add(createInst);
}
reblkInst.setReBlockInstructions(inLabels.toArray(new String[inLabels.size()]), "", reblock, "", outLabels.toArray(new String[inLabels.size()]), resultIndex, 1, 1);
c2binst.add(reblkInst);
// generate instructions that rename the output variables of REBLOCK job
Instruction cpInst = null, rmInst = null;
for (int i = 0; i < matrices.size(); i++) {
cpInst = VariableCPInstruction.prepareCopyInstruction(outLabels.get(i), matrices.get(i).getName());
rmInst = VariableCPInstruction.prepareRemoveInstruction(outLabels.get(i));
cpInst.setLocation(matrices.get(i));
rmInst.setLocation(matrices.get(i));
c2binst.add(cpInst);
c2binst.add(rmInst);
// c2binst.add(CPInstructionParser.parseSingleInstruction("CP" + Lops.OPERAND_DELIMITOR + "cpvar"+Lops.OPERAND_DELIMITOR+ outLabels.get(i) + Lops.OPERAND_DELIMITOR + matrices.get(i).getName()));
}
} catch (Exception e) {
throw new RuntimeException(this.printBlockErrorLocation() + "error generating instructions", e);
}
// LOGGING instructions
if (LOG.isTraceEnabled()) {
LOG.trace("\n--- Cell-2-Block Instructions ---");
for (Instruction i : c2binst) {
LOG.trace(i.toString());
}
LOG.trace("----------------------------------");
}
}
// null if no output matrices
return c2binst;
}
Aggregations