Search in sources :

Example 1 with IDistributedCacheConsumer

use of org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer in project incubator-systemml by apache.

the class GMR method setupDistributedCache.

private static boolean setupDistributedCache(JobConf job, String instMap, String instRed, String[] inputs, long[] rlens, long[] clens) throws DMLRuntimeException {
    //concatenate mapper and reducer instructions
    String allInsts = (instMap != null && !instMap.trim().isEmpty()) ? instMap : null;
    if (allInsts == null)
        allInsts = instRed;
    else if (instRed != null && !instRed.trim().isEmpty())
        allInsts = allInsts + Instruction.INSTRUCTION_DELIM + instRed;
    //setup distributed cache inputs (at least one)
    if (allInsts != null && !allInsts.trim().isEmpty() && InstructionUtils.isDistributedCacheUsed(allInsts)) {
        //get all indexes of distributed cache inputs
        ArrayList<Byte> indexList = new ArrayList<Byte>();
        String[] inst = allInsts.split(Instruction.INSTRUCTION_DELIM);
        for (String tmp : inst) {
            if (InstructionUtils.isDistributedCacheUsed(tmp)) {
                ArrayList<Byte> tmpindexList = new ArrayList<Byte>();
                MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(tmp);
                if (mrinst instanceof IDistributedCacheConsumer)
                    ((IDistributedCacheConsumer) mrinst).addDistCacheIndex(tmp, tmpindexList);
                //copy distinct indexes only (prevent redundant add to distcache)
                for (Byte tmpix : tmpindexList) if (!indexList.contains(tmpix))
                    indexList.add(tmpix);
            }
        }
        //construct index and path strings
        // list of paths to be placed in Distributed cache
        ArrayList<String> pathList = new ArrayList<String>();
        // input indices to be placed in Distributed Cache (concatenated) 
        StringBuilder indexString = new StringBuilder();
        // input paths to be placed in Distributed Cache (concatenated) 
        StringBuilder pathString = new StringBuilder();
        for (byte index : indexList) {
            if (pathList.size() > 0) {
                indexString.append(Instruction.INSTRUCTION_DELIM);
                pathString.append(Instruction.INSTRUCTION_DELIM);
            }
            pathList.add(inputs[index]);
            indexString.append(index);
            pathString.append(inputs[index]);
        }
        //configure mr job with distcache indexes
        MRJobConfiguration.setupDistCacheInputs(job, indexString.toString(), pathString.toString(), pathList);
        //clean in-memory cache (prevent job interference in local mode)
        if (InfrastructureAnalyzer.isLocalMode(job)) {
            MRBaseForCommonInstructions.resetDistCache();
            return true;
        }
    }
    return false;
}
Also used : IDistributedCacheConsumer(org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer) ArrayList(java.util.ArrayList) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction)

Example 2 with IDistributedCacheConsumer

use of org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer in project incubator-systemml by apache.

the class GMR method getDistCacheOnlyInputs.

/**
	 * Determine which indices are only used as inputs through distributed cache and hence would
	 * be redundant job inputs.
	 * 
	 * @param realIndexes array of byte indexes
	 * @param inst1 instruction 1
	 * @param inst2 instruction 2
	 * @param inst3 instruction 3
	 * @param inst4 instruction 4
	 * @return array of byte indexes
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private static boolean[] getDistCacheOnlyInputs(byte[] realIndexes, String inst1, String inst2, String inst3, String inst4) throws DMLRuntimeException {
    boolean[] ret = new boolean[realIndexes.length];
    String[] inst = new String[] { inst1, inst2, inst3, inst4 };
    //for all result indexes
    for (int i = 0; i < ret.length; i++) {
        byte index = realIndexes[i];
        String indexStr = index + Lop.DATATYPE_PREFIX + DataType.MATRIX.toString();
        boolean distCacheOnly = true;
        boolean use = false;
        for (String linst : inst) {
            //for all instruction categories
            if (linst != null && !linst.trim().isEmpty()) {
                String[] alinst = linst.split(Lop.INSTRUCTION_DELIMITOR);
                for (//for each individual instruction
                String tmp : //for each individual instruction
                alinst) {
                    boolean lcache = false;
                    if (InstructionUtils.isDistributedCacheUsed(tmp)) {
                        MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(tmp);
                        if (mrinst instanceof IDistributedCacheConsumer)
                            lcache = ((IDistributedCacheConsumer) mrinst).isDistCacheOnlyIndex(tmp, index);
                    }
                    distCacheOnly &= (lcache || !tmp.contains(indexStr));
                    use |= tmp.contains(indexStr);
                }
            }
        }
        //probe for use in order to account for write only jobs
        ret[i] = distCacheOnly && use;
    }
    return ret;
}
Also used : IDistributedCacheConsumer(org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction)

Aggregations

IDistributedCacheConsumer (org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer)2 MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)2 ArrayList (java.util.ArrayList)1