use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.
the class MapperBase method configure.
public void configure(JobConf job) {
super.configure(job);
//since one matrix file can occur multiple times in a statement
try {
representativeMatrixes = MRJobConfiguration.getInputMatrixIndexesInMapper(job);
} catch (IOException e) {
throw new RuntimeException(e);
}
//get input converter information
inputConverter = MRJobConfiguration.getInputConverter(job, representativeMatrixes.get(0));
DataGenMRInstruction[] allDataGenIns;
MRInstruction[] allMapperIns;
ReblockInstruction[] allReblockIns;
CSVReblockInstruction[] allCSVReblockIns;
try {
allDataGenIns = MRJobConfiguration.getDataGenInstructions(job);
//parse the instructions on the matrices that this file represent
allMapperIns = MRJobConfiguration.getInstructionsInMapper(job);
//parse the reblock instructions on the matrices that this file represent
allReblockIns = MRJobConfiguration.getReblockInstructions(job);
allCSVReblockIns = MRJobConfiguration.getCSVReblockInstructions(job);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
//get all the output indexes
byte[] outputs = MRJobConfiguration.getOutputIndexesInMapper(job);
//get the dimension of all the representative matrices
rlens = new long[representativeMatrixes.size()];
clens = new long[representativeMatrixes.size()];
for (int i = 0; i < representativeMatrixes.size(); i++) {
rlens[i] = MRJobConfiguration.getNumRows(job, representativeMatrixes.get(i));
clens[i] = MRJobConfiguration.getNumColumns(job, representativeMatrixes.get(i));
// System.out.println("get dimension for "+representativeMatrixes.get(i)+": "+rlens[i]+", "+clens[i]);
}
//get the block sizes of the representative matrices
brlens = new int[representativeMatrixes.size()];
bclens = new int[representativeMatrixes.size()];
for (int i = 0; i < representativeMatrixes.size(); i++) {
brlens[i] = MRJobConfiguration.getNumRowsPerBlock(job, representativeMatrixes.get(i));
bclens[i] = MRJobConfiguration.getNumColumnsPerBlock(job, representativeMatrixes.get(i));
// System.out.println("get blocksize for "+representativeMatrixes.get(i)+": "+brlens[i]+", "+bclens[i]);
}
rbounds = new long[representativeMatrixes.size()];
cbounds = new long[representativeMatrixes.size()];
lastblockrlens = new int[representativeMatrixes.size()];
lastblockclens = new int[representativeMatrixes.size()];
//calculate upper boundaries for key value pairs
if (valueClass.equals(MatrixBlock.class)) {
for (int i = 0; i < representativeMatrixes.size(); i++) {
rbounds[i] = (long) Math.ceil((double) rlens[i] / (double) brlens[i]);
cbounds[i] = (long) Math.ceil((double) clens[i] / (double) bclens[i]);
lastblockrlens[i] = (int) (rlens[i] % brlens[i]);
lastblockclens[i] = (int) (clens[i] % bclens[i]);
if (lastblockrlens[i] == 0)
lastblockrlens[i] = brlens[i];
if (lastblockclens[i] == 0)
lastblockclens[i] = bclens[i];
/*
* what is this for????
// DRB: the row indexes need to be fixed
rbounds[i] = rlens[i];*/
}
} else {
for (int i = 0; i < representativeMatrixes.size(); i++) {
rbounds[i] = rlens[i];
cbounds[i] = clens[i];
lastblockrlens[i] = 1;
lastblockclens[i] = 1;
// System.out.println("get bound for "+representativeMatrixes.get(i)+": "+rbounds[i]+", "+cbounds[i]);
}
}
//load data from distributed cache (if required, reuse if jvm_reuse)
try {
setupDistCacheFiles(job);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
//collect unary instructions for each representative matrix
HashSet<Byte> set = new HashSet<Byte>();
for (int i = 0; i < representativeMatrixes.size(); i++) {
set.clear();
set.add(representativeMatrixes.get(i));
//collect the relavent datagen instructions for this representative matrix
ArrayList<DataGenMRInstruction> dataGensForThisMatrix = new ArrayList<DataGenMRInstruction>();
if (allDataGenIns != null) {
for (DataGenMRInstruction ins : allDataGenIns) {
if (set.contains(ins.getInput())) {
dataGensForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
if (dataGensForThisMatrix.size() > 1)
throw new RuntimeException("only expects at most one rand instruction per input");
if (dataGensForThisMatrix.isEmpty())
dataGen_instructions.add(null);
else
dataGen_instructions.add(dataGensForThisMatrix.get(0));
//collect the relavent instructions for this representative matrix
ArrayList<MRInstruction> opsForThisMatrix = new ArrayList<MRInstruction>();
if (allMapperIns != null) {
for (MRInstruction ins : allMapperIns) {
try {
/*
boolean toAdd=true;
for(byte input: ins.getInputIndexes())
if(!set.contains(input))
{
toAdd=false;
break;
}
*/
boolean toAdd = false;
for (byte input : ins.getInputIndexes()) if (set.contains(input)) {
toAdd = true;
break;
}
if (toAdd) {
opsForThisMatrix.add(ins);
set.add(ins.output);
}
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
}
}
mapper_instructions.add(opsForThisMatrix);
//collect the relavent reblock instructions for this representative matrix
ArrayList<ReblockInstruction> reblocksForThisMatrix = new ArrayList<ReblockInstruction>();
if (allReblockIns != null) {
for (ReblockInstruction ins : allReblockIns) {
if (set.contains(ins.input)) {
reblocksForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
reblock_instructions.add(reblocksForThisMatrix);
//collect the relavent reblock instructions for this representative matrix
ArrayList<CSVReblockInstruction> csvReblocksForThisMatrix = new ArrayList<CSVReblockInstruction>();
if (allCSVReblockIns != null) {
for (CSVReblockInstruction ins : allCSVReblockIns) {
if (set.contains(ins.input)) {
csvReblocksForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
csv_reblock_instructions.add(csvReblocksForThisMatrix);
//collect the output indexes for this representative matrix
ArrayList<Byte> outsForThisMatrix = new ArrayList<Byte>();
for (byte output : outputs) {
if (set.contains(output))
outsForThisMatrix.add(output);
}
outputIndexes.add(outsForThisMatrix);
}
}
use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.
the class ReblockMapper method processReblockInMapperAndOutput.
protected void processReblockInMapperAndOutput(int index, OutputCollector<Writable, Writable> out) throws IOException {
for (ReblockInstruction ins : reblock_instructions.get(index)) {
ArrayList<IndexedMatrixValue> ixvList = cachedValues.get(ins.input);
if (ixvList != null) {
for (IndexedMatrixValue inValue : ixvList) {
if (inValue == null)
continue;
//get buffer
ReblockBuffer rbuff = buffer.get(ins.output);
if (rbuff == null) {
MatrixCharacteristics mc = dimensionsOut.get(ins.output);
rbuff = new ReblockBuffer(buffersize, mc.getRows(), mc.getCols(), ins.brlen, ins.bclen);
buffer.put(ins.output, rbuff);
}
//append cells and flush buffer if required
MatrixValue mval = inValue.getValue();
if (mval instanceof MatrixBlock) {
MatrixIndexes inIx = inValue.getIndexes();
MatrixCharacteristics mc = dimensionsIn.get(ins.input);
long row_offset = (inIx.getRowIndex() - 1) * mc.getRowsPerBlock() + 1;
long col_offset = (inIx.getColumnIndex() - 1) * mc.getColsPerBlock() + 1;
//append entire block incl. flush on demand
rbuff.appendBlock(row_offset, col_offset, (MatrixBlock) mval, ins.output, out);
} else //if( mval instanceof MatrixCell )
{
rbuff.appendCell(inValue.getIndexes().getRowIndex(), inValue.getIndexes().getColumnIndex(), ((MatrixCell) mval).getValue());
//flush buffer if necessary
if (rbuff.getSize() >= rbuff.getCapacity())
rbuff.flushBuffer(ins.output, out);
}
}
}
}
}
use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.
the class ReblockReducer method configure.
@Override
public void configure(JobConf job) {
MRJobConfiguration.setMatrixValueClass(job, true);
super.configure(job);
try {
//parse the reblock instructions
ReblockInstruction[] reblockInstructions = MRJobConfiguration.getReblockInstructions(job);
for (ReblockInstruction ins : reblockInstructions) dimensions.put(ins.output, MRJobConfiguration.getMatrixCharactristicsForReblock(job, ins.output));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.
the class RunMRJobs method executeInMemoryReblockOperations.
private static JobReturn executeInMemoryReblockOperations(MRJobInstruction inst, String shuffleInst, MatrixObject[] inputMatrices, MatrixObject[] outputMatrices) throws DMLRuntimeException {
MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
ReblockInstruction[] rblkSet = MRInstructionParser.parseReblockInstructions(shuffleInst);
byte[] results = inst.getIv_resultIndices();
for (ReblockInstruction rblk : rblkSet) {
//CP Reblock through caching framework (no copy required: same data, next op copies)
MatrixBlock mb = inputMatrices[rblk.input].acquireRead();
for (int i = 0; i < results.length; i++) if (rblk.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), rblk.brlen, rblk.bclen, mb.getNonZeros());
}
inputMatrices[rblk.input].release();
}
return new JobReturn(mc, inst.getOutputInfos(), true);
}
use of org.apache.sysml.runtime.instructions.mr.ReblockInstruction in project incubator-systemml by apache.
the class CSVReblockReducer method configure.
@Override
public void configure(JobConf job) {
MRJobConfiguration.setMatrixValueClass(job, true);
super.configure(job);
//parse the reblock instructions
CSVReblockInstruction[] reblockInstructions;
try {
reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
for (ReblockInstruction ins : reblockInstructions) dimensions.put(ins.output, MRJobConfiguration.getMatrixCharactristicsForReblock(job, ins.output));
}
Aggregations