Search in sources :

Example 1 with ReblockInstruction

use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.

the class MapperBase method configure.

public void configure(JobConf job) {
    super.configure(job);
    //since one matrix file can occur multiple times in a statement
    try {
        representativeMatrixes = MRJobConfiguration.getInputMatrixIndexesInMapper(job);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    //get input converter information
    inputConverter = MRJobConfiguration.getInputConverter(job, representativeMatrixes.get(0));
    DataGenMRInstruction[] allDataGenIns;
    MRInstruction[] allMapperIns;
    ReblockInstruction[] allReblockIns;
    CSVReblockInstruction[] allCSVReblockIns;
    try {
        allDataGenIns = MRJobConfiguration.getDataGenInstructions(job);
        //parse the instructions on the matrices that this file represent
        allMapperIns = MRJobConfiguration.getInstructionsInMapper(job);
        //parse the reblock instructions on the matrices that this file represent
        allReblockIns = MRJobConfiguration.getReblockInstructions(job);
        allCSVReblockIns = MRJobConfiguration.getCSVReblockInstructions(job);
    } catch (DMLRuntimeException e) {
        throw new RuntimeException(e);
    }
    //get all the output indexes
    byte[] outputs = MRJobConfiguration.getOutputIndexesInMapper(job);
    //get the dimension of all the representative matrices
    rlens = new long[representativeMatrixes.size()];
    clens = new long[representativeMatrixes.size()];
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        rlens[i] = MRJobConfiguration.getNumRows(job, representativeMatrixes.get(i));
        clens[i] = MRJobConfiguration.getNumColumns(job, representativeMatrixes.get(i));
    //	System.out.println("get dimension for "+representativeMatrixes.get(i)+": "+rlens[i]+", "+clens[i]);
    }
    //get the block sizes of the representative matrices
    brlens = new int[representativeMatrixes.size()];
    bclens = new int[representativeMatrixes.size()];
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        brlens[i] = MRJobConfiguration.getNumRowsPerBlock(job, representativeMatrixes.get(i));
        bclens[i] = MRJobConfiguration.getNumColumnsPerBlock(job, representativeMatrixes.get(i));
    //	System.out.println("get blocksize for "+representativeMatrixes.get(i)+": "+brlens[i]+", "+bclens[i]);
    }
    rbounds = new long[representativeMatrixes.size()];
    cbounds = new long[representativeMatrixes.size()];
    lastblockrlens = new int[representativeMatrixes.size()];
    lastblockclens = new int[representativeMatrixes.size()];
    //calculate upper boundaries for key value pairs
    if (valueClass.equals(MatrixBlock.class)) {
        for (int i = 0; i < representativeMatrixes.size(); i++) {
            rbounds[i] = (long) Math.ceil((double) rlens[i] / (double) brlens[i]);
            cbounds[i] = (long) Math.ceil((double) clens[i] / (double) bclens[i]);
            lastblockrlens[i] = (int) (rlens[i] % brlens[i]);
            lastblockclens[i] = (int) (clens[i] % bclens[i]);
            if (lastblockrlens[i] == 0)
                lastblockrlens[i] = brlens[i];
            if (lastblockclens[i] == 0)
                lastblockclens[i] = bclens[i];
        /*
				 * what is this for????
				// DRB: the row indexes need to be fixed 
				rbounds[i] = rlens[i];*/
        }
    } else {
        for (int i = 0; i < representativeMatrixes.size(); i++) {
            rbounds[i] = rlens[i];
            cbounds[i] = clens[i];
            lastblockrlens[i] = 1;
            lastblockclens[i] = 1;
        //	System.out.println("get bound for "+representativeMatrixes.get(i)+": "+rbounds[i]+", "+cbounds[i]);
        }
    }
    //load data from distributed cache (if required, reuse if jvm_reuse)
    try {
        setupDistCacheFiles(job);
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
    //collect unary instructions for each representative matrix
    HashSet<Byte> set = new HashSet<Byte>();
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        set.clear();
        set.add(representativeMatrixes.get(i));
        //collect the relavent datagen instructions for this representative matrix
        ArrayList<DataGenMRInstruction> dataGensForThisMatrix = new ArrayList<DataGenMRInstruction>();
        if (allDataGenIns != null) {
            for (DataGenMRInstruction ins : allDataGenIns) {
                if (set.contains(ins.getInput())) {
                    dataGensForThisMatrix.add(ins);
                    set.add(ins.output);
                }
            }
        }
        if (dataGensForThisMatrix.size() > 1)
            throw new RuntimeException("only expects at most one rand instruction per input");
        if (dataGensForThisMatrix.isEmpty())
            dataGen_instructions.add(null);
        else
            dataGen_instructions.add(dataGensForThisMatrix.get(0));
        //collect the relavent instructions for this representative matrix
        ArrayList<MRInstruction> opsForThisMatrix = new ArrayList<MRInstruction>();
        if (allMapperIns != null) {
            for (MRInstruction ins : allMapperIns) {
                try {
                    /*
						boolean toAdd=true;
						for(byte input: ins.getInputIndexes())
							if(!set.contains(input))
							{
								toAdd=false;
								break;
							}
							*/
                    boolean toAdd = false;
                    for (byte input : ins.getInputIndexes()) if (set.contains(input)) {
                        toAdd = true;
                        break;
                    }
                    if (toAdd) {
                        opsForThisMatrix.add(ins);
                        set.add(ins.output);
                    }
                } catch (DMLRuntimeException e) {
                    throw new RuntimeException(e);
                }
            }
        }
        mapper_instructions.add(opsForThisMatrix);
        //collect the relavent reblock instructions for this representative matrix
        ArrayList<ReblockInstruction> reblocksForThisMatrix = new ArrayList<ReblockInstruction>();
        if (allReblockIns != null) {
            for (ReblockInstruction ins : allReblockIns) {
                if (set.contains(ins.input)) {
                    reblocksForThisMatrix.add(ins);
                    set.add(ins.output);
                }
            }
        }
        reblock_instructions.add(reblocksForThisMatrix);
        //collect the relavent reblock instructions for this representative matrix
        ArrayList<CSVReblockInstruction> csvReblocksForThisMatrix = new ArrayList<CSVReblockInstruction>();
        if (allCSVReblockIns != null) {
            for (CSVReblockInstruction ins : allCSVReblockIns) {
                if (set.contains(ins.input)) {
                    csvReblocksForThisMatrix.add(ins);
                    set.add(ins.output);
                }
            }
        }
        csv_reblock_instructions.add(csvReblocksForThisMatrix);
        //collect the output indexes for this representative matrix
        ArrayList<Byte> outsForThisMatrix = new ArrayList<Byte>();
        for (byte output : outputs) {
            if (set.contains(output))
                outsForThisMatrix.add(output);
        }
        outputIndexes.add(outsForThisMatrix);
    }
}
Also used : CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) HashSet(java.util.HashSet)

Example 2 with ReblockInstruction

use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.

the class ReblockMapper method processReblockInMapperAndOutput.

protected void processReblockInMapperAndOutput(int index, OutputCollector<Writable, Writable> out) throws IOException {
    for (ReblockInstruction ins : reblock_instructions.get(index)) {
        ArrayList<IndexedMatrixValue> ixvList = cachedValues.get(ins.input);
        if (ixvList != null) {
            for (IndexedMatrixValue inValue : ixvList) {
                if (inValue == null)
                    continue;
                //get buffer
                ReblockBuffer rbuff = buffer.get(ins.output);
                if (rbuff == null) {
                    MatrixCharacteristics mc = dimensionsOut.get(ins.output);
                    rbuff = new ReblockBuffer(buffersize, mc.getRows(), mc.getCols(), ins.brlen, ins.bclen);
                    buffer.put(ins.output, rbuff);
                }
                //append cells and flush buffer if required
                MatrixValue mval = inValue.getValue();
                if (mval instanceof MatrixBlock) {
                    MatrixIndexes inIx = inValue.getIndexes();
                    MatrixCharacteristics mc = dimensionsIn.get(ins.input);
                    long row_offset = (inIx.getRowIndex() - 1) * mc.getRowsPerBlock() + 1;
                    long col_offset = (inIx.getColumnIndex() - 1) * mc.getColsPerBlock() + 1;
                    //append entire block incl. flush on demand
                    rbuff.appendBlock(row_offset, col_offset, (MatrixBlock) mval, ins.output, out);
                } else //if( mval instanceof MatrixCell )
                {
                    rbuff.appendCell(inValue.getIndexes().getRowIndex(), inValue.getIndexes().getColumnIndex(), ((MatrixCell) mval).getValue());
                    //flush buffer if necessary
                    if (rbuff.getSize() >= rbuff.getCapacity())
                        rbuff.flushBuffer(ins.output, out);
                }
            }
        }
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixValue(org.apache.sysml.runtime.matrix.data.MatrixValue) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 3 with ReblockInstruction

use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.

the class ReblockReducer method configure.

@Override
public void configure(JobConf job) {
    MRJobConfiguration.setMatrixValueClass(job, true);
    super.configure(job);
    try {
        //parse the reblock instructions 
        ReblockInstruction[] reblockInstructions = MRJobConfiguration.getReblockInstructions(job);
        for (ReblockInstruction ins : reblockInstructions) dimensions.put(ins.output, MRJobConfiguration.getMatrixCharactristicsForReblock(job, ins.output));
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Example 4 with ReblockInstruction

use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.

the class RunMRJobs method executeInMemoryReblockOperations.

private static JobReturn executeInMemoryReblockOperations(MRJobInstruction inst, String shuffleInst, MatrixObject[] inputMatrices, MatrixObject[] outputMatrices) throws DMLRuntimeException {
    MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
    ReblockInstruction[] rblkSet = MRInstructionParser.parseReblockInstructions(shuffleInst);
    byte[] results = inst.getIv_resultIndices();
    for (ReblockInstruction rblk : rblkSet) {
        //CP Reblock through caching framework (no copy required: same data, next op copies) 
        MatrixBlock mb = inputMatrices[rblk.input].acquireRead();
        for (int i = 0; i < results.length; i++) if (rblk.output == results[i]) {
            outputMatrices[i].acquireModify(mb);
            outputMatrices[i].release();
            mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), rblk.brlen, rblk.bclen, mb.getNonZeros());
        }
        inputMatrices[rblk.input].release();
    }
    return new JobReturn(mc, inst.getOutputInfos(), true);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 5 with ReblockInstruction

use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.

the class CSVReblockReducer method configure.

@Override
public void configure(JobConf job) {
    MRJobConfiguration.setMatrixValueClass(job, true);
    super.configure(job);
    //parse the reblock instructions 
    CSVReblockInstruction[] reblockInstructions;
    try {
        reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job);
    } catch (DMLRuntimeException e) {
        throw new RuntimeException(e);
    }
    for (ReblockInstruction ins : reblockInstructions) dimensions.put(ins.output, MRJobConfiguration.getMatrixCharactristicsForReblock(job, ins.output));
}
Also used : DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

ReblockInstruction (org.apache.sysml.runtime.instructions.mr.ReblockInstruction)8 IOException (java.io.IOException)3 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)3 CSVReblockInstruction (org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction)3 DataGenMRInstruction (org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction)3 PMMJMRInstruction (org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction)3 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)3 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 AggregateBinaryInstruction (org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction)2 AggregateInstruction (org.apache.sysml.runtime.instructions.mr.AggregateInstruction)2 BinaryMInstruction (org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)2 CM_N_COVInstruction (org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction)2 GroupedAggregateInstruction (org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction)2 MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)2 MapMultChainInstruction (org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction)2 RemoveEmptyMRInstruction (org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction)2 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)2 HashMap (java.util.HashMap)1 TreeMap (java.util.TreeMap)1