use of org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer in project incubator-systemml by apache.
the class GMR method setupDistributedCache.
private static boolean setupDistributedCache(JobConf job, String instMap, String instRed, String[] inputs, long[] rlens, long[] clens) throws DMLRuntimeException {
//concatenate mapper and reducer instructions
String allInsts = (instMap != null && !instMap.trim().isEmpty()) ? instMap : null;
if (allInsts == null)
allInsts = instRed;
else if (instRed != null && !instRed.trim().isEmpty())
allInsts = allInsts + Instruction.INSTRUCTION_DELIM + instRed;
//setup distributed cache inputs (at least one)
if (allInsts != null && !allInsts.trim().isEmpty() && InstructionUtils.isDistributedCacheUsed(allInsts)) {
//get all indexes of distributed cache inputs
ArrayList<Byte> indexList = new ArrayList<Byte>();
String[] inst = allInsts.split(Instruction.INSTRUCTION_DELIM);
for (String tmp : inst) {
if (InstructionUtils.isDistributedCacheUsed(tmp)) {
ArrayList<Byte> tmpindexList = new ArrayList<Byte>();
MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(tmp);
if (mrinst instanceof IDistributedCacheConsumer)
((IDistributedCacheConsumer) mrinst).addDistCacheIndex(tmp, tmpindexList);
//copy distinct indexes only (prevent redundant add to distcache)
for (Byte tmpix : tmpindexList) if (!indexList.contains(tmpix))
indexList.add(tmpix);
}
}
//construct index and path strings
// list of paths to be placed in Distributed cache
ArrayList<String> pathList = new ArrayList<String>();
// input indices to be placed in Distributed Cache (concatenated)
StringBuilder indexString = new StringBuilder();
// input paths to be placed in Distributed Cache (concatenated)
StringBuilder pathString = new StringBuilder();
for (byte index : indexList) {
if (pathList.size() > 0) {
indexString.append(Instruction.INSTRUCTION_DELIM);
pathString.append(Instruction.INSTRUCTION_DELIM);
}
pathList.add(inputs[index]);
indexString.append(index);
pathString.append(inputs[index]);
}
//configure mr job with distcache indexes
MRJobConfiguration.setupDistCacheInputs(job, indexString.toString(), pathString.toString(), pathList);
//clean in-memory cache (prevent job interference in local mode)
if (InfrastructureAnalyzer.isLocalMode(job)) {
MRBaseForCommonInstructions.resetDistCache();
return true;
}
}
return false;
}
use of org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer in project incubator-systemml by apache.
the class GMR method getDistCacheOnlyInputs.
/**
* Determine which indices are only used as inputs through distributed cache and hence would
* be redundant job inputs.
*
* @param realIndexes array of byte indexes
* @param inst1 instruction 1
* @param inst2 instruction 2
* @param inst3 instruction 3
* @param inst4 instruction 4
* @return array of byte indexes
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
private static boolean[] getDistCacheOnlyInputs(byte[] realIndexes, String inst1, String inst2, String inst3, String inst4) throws DMLRuntimeException {
boolean[] ret = new boolean[realIndexes.length];
String[] inst = new String[] { inst1, inst2, inst3, inst4 };
//for all result indexes
for (int i = 0; i < ret.length; i++) {
byte index = realIndexes[i];
String indexStr = index + Lop.DATATYPE_PREFIX + DataType.MATRIX.toString();
boolean distCacheOnly = true;
boolean use = false;
for (String linst : inst) {
//for all instruction categories
if (linst != null && !linst.trim().isEmpty()) {
String[] alinst = linst.split(Lop.INSTRUCTION_DELIMITOR);
for (//for each individual instruction
String tmp : //for each individual instruction
alinst) {
boolean lcache = false;
if (InstructionUtils.isDistributedCacheUsed(tmp)) {
MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(tmp);
if (mrinst instanceof IDistributedCacheConsumer)
lcache = ((IDistributedCacheConsumer) mrinst).isDistCacheOnlyIndex(tmp, index);
}
distCacheOnly &= (lcache || !tmp.contains(indexStr));
use |= tmp.contains(indexStr);
}
}
}
//probe for use in order to account for write only jobs
ret[i] = distCacheOnly && use;
}
return ret;
}
Aggregations