use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.
the class RunMRJobs method executeInMemoryDataGenOperations.
private static JobReturn executeInMemoryDataGenOperations(MRJobInstruction inst, String randInst, MatrixObject[] outputMatrices) {
MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
DataGenMRInstruction[] dgSet = MRInstructionParser.parseDataGenInstructions(randInst);
byte[] results = inst.getIv_resultIndices();
for (DataGenMRInstruction ldgInst : dgSet) {
if (ldgInst instanceof RandInstruction) {
// CP Rand block operation
RandInstruction lrand = (RandInstruction) ldgInst;
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(lrand.getProbabilityDensityFunction(), (int) lrand.getRows(), (int) lrand.getCols(), lrand.getRowsInBlock(), lrand.getColsInBlock(), lrand.getSparsity(), lrand.getMinValue(), lrand.getMaxValue(), lrand.getPdfParams());
MatrixBlock mb = MatrixBlock.randOperations(rgen, lrand.getSeed());
for (int i = 0; i < results.length; i++) if (lrand.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lrand.getRowsInBlock(), lrand.getColsInBlock(), mb.getNonZeros());
}
} else if (ldgInst instanceof SeqInstruction) {
SeqInstruction lseq = (SeqInstruction) ldgInst;
MatrixBlock mb = MatrixBlock.seqOperations(lseq.fromValue, lseq.toValue, lseq.incrValue);
for (int i = 0; i < results.length; i++) if (lseq.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lseq.getRowsInBlock(), lseq.getColsInBlock(), mb.getNonZeros());
}
}
}
return new JobReturn(mc, inst.getOutputInfos(), true);
}
use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.
the class DataGenMR method runJob.
/**
* <p>Starts a Rand MapReduce job which will produce one or more random objects.</p>
*
* @param inst MR job instruction
* @param dataGenInstructions array of data gen instructions
* @param instructionsInMapper instructions in mapper
* @param aggInstructionsInReducer aggregate instructions in reducer
* @param otherInstructionsInReducer other instructions in reducer
* @param numReducers number of reducers
* @param replication file replication
* @param resultIndexes result indexes for each random object
* @param dimsUnknownFilePrefix file path prefix when dimensions unknown
* @param outputs output file for each random object
* @param outputInfos output information for each random object
* @return matrix characteristics for each random object
* @throws Exception if Exception occurs
*/
public static JobReturn runJob(MRJobInstruction inst, String[] dataGenInstructions, String instructionsInMapper, String aggInstructionsInReducer, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String dimsUnknownFilePrefix, String[] outputs, OutputInfo[] outputInfos) throws Exception {
JobConf job = new JobConf(DataGenMR.class);
job.setJobName("DataGen-MR");
// whether use block representation or cell representation
MRJobConfiguration.setMatrixValueClass(job, true);
byte[] realIndexes = new byte[dataGenInstructions.length];
for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
String[] inputs = new String[dataGenInstructions.length];
InputInfo[] inputInfos = new InputInfo[dataGenInstructions.length];
long[] rlens = new long[dataGenInstructions.length];
long[] clens = new long[dataGenInstructions.length];
int[] brlens = new int[dataGenInstructions.length];
int[] bclens = new int[dataGenInstructions.length];
FileSystem fs = FileSystem.get(job);
String dataGenInsStr = "";
int numblocks = 0;
int maxbrlen = -1, maxbclen = -1;
double maxsparsity = -1;
for (int i = 0; i < dataGenInstructions.length; i++) {
dataGenInsStr = dataGenInsStr + Lop.INSTRUCTION_DELIMITOR + dataGenInstructions[i];
MRInstruction mrins = MRInstructionParser.parseSingleInstruction(dataGenInstructions[i]);
MRType mrtype = mrins.getMRInstructionType();
DataGenMRInstruction genInst = (DataGenMRInstruction) mrins;
rlens[i] = genInst.getRows();
clens[i] = genInst.getCols();
brlens[i] = genInst.getRowsInBlock();
bclens[i] = genInst.getColsInBlock();
maxbrlen = Math.max(maxbrlen, brlens[i]);
maxbclen = Math.max(maxbclen, bclens[i]);
if (mrtype == MRType.Rand) {
RandInstruction randInst = (RandInstruction) mrins;
inputs[i] = LibMatrixDatagen.generateUniqueSeedPath(genInst.getBaseDir());
maxsparsity = Math.max(maxsparsity, randInst.getSparsity());
PrintWriter pw = null;
try {
pw = new PrintWriter(fs.create(new Path(inputs[i])));
// for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
// seed generation
Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(randInst.getSeed());
for (long r = 0; r < Math.max(rlens[i], 1); r += brlens[i]) {
long curBlockRowSize = Math.min(brlens[i], (rlens[i] - r));
for (long c = 0; c < Math.max(clens[i], 1); c += bclens[i]) {
long curBlockColSize = Math.min(bclens[i], (clens[i] - c));
sb.append((r / brlens[i]) + 1);
sb.append(',');
sb.append((c / bclens[i]) + 1);
sb.append(',');
sb.append(curBlockRowSize);
sb.append(',');
sb.append(curBlockColSize);
sb.append(',');
sb.append(bigrand.nextLong());
pw.println(sb.toString());
sb.setLength(0);
numblocks++;
}
}
} finally {
IOUtilFunctions.closeSilently(pw);
}
inputInfos[i] = InputInfo.TextCellInputInfo;
} else if (mrtype == MRType.Seq) {
SeqInstruction seqInst = (SeqInstruction) mrins;
inputs[i] = genInst.getBaseDir() + System.currentTimeMillis() + ".seqinput";
// always dense
maxsparsity = 1.0;
double from = seqInst.fromValue;
double to = seqInst.toValue;
double incr = seqInst.incrValue;
// handle default 1 to -1 for special case of from>to
incr = LibMatrixDatagen.updateSeqIncr(from, to, incr);
// Correctness checks on (from, to, incr)
boolean neg = (from > to);
if (incr == 0)
throw new DMLRuntimeException("Invalid value for \"increment\" in seq().");
if (neg != (incr < 0))
throw new DMLRuntimeException("Wrong sign for the increment in a call to seq()");
// Compute the number of rows in the sequence
long numrows = UtilFunctions.getSeqLength(from, to, incr);
if (rlens[i] > 0) {
if (numrows != rlens[i])
throw new DMLRuntimeException("Unexpected error while processing sequence instruction. Expected number of rows does not match given number: " + rlens[i] + " != " + numrows);
} else {
rlens[i] = numrows;
}
if (clens[i] > 0 && clens[i] != 1)
throw new DMLRuntimeException("Unexpected error while processing sequence instruction. Number of columns (" + clens[i] + ") must be equal to 1.");
else
clens[i] = 1;
PrintWriter pw = null;
try {
pw = new PrintWriter(fs.create(new Path(inputs[i])));
StringBuilder sb = new StringBuilder();
double temp = from;
double block_from, block_to;
for (long r = 0; r < rlens[i]; r += brlens[i]) {
long curBlockRowSize = Math.min(brlens[i], (rlens[i] - r));
// block (bid_i,bid_j) generates a sequence from the interval [block_from, block_to] (inclusive of both end points of the interval)
long bid_i = ((r / brlens[i]) + 1);
long bid_j = 1;
block_from = temp;
block_to = temp + (curBlockRowSize - 1) * incr;
// next block starts from here
temp = block_to + incr;
sb.append(bid_i);
sb.append(',');
sb.append(bid_j);
sb.append(',');
sb.append(block_from);
sb.append(',');
sb.append(block_to);
sb.append(',');
sb.append(incr);
pw.println(sb.toString());
sb.setLength(0);
numblocks++;
}
} finally {
IOUtilFunctions.closeSilently(pw);
}
inputInfos[i] = InputInfo.TextCellInputInfo;
} else {
throw new DMLRuntimeException("Unexpected Data Generation Instruction Type: " + mrtype);
}
}
// remove the first ","
dataGenInsStr = dataGenInsStr.substring(1);
RunningJob runjob;
MatrixCharacteristics[] stats;
try {
// set up the block size
MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
// set up the input files and their format information
MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.BLOCK);
// set up the dimensions of input matrices
MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
MRJobConfiguration.setDimsUnknownFilePrefix(job, dimsUnknownFilePrefix);
// set up the block size
MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
// set up the rand Instructions
MRJobConfiguration.setRandInstructions(job, dataGenInsStr);
// set up unary instructions that will perform in the mapper
MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
// set up the aggregate instructions that will happen in the combiner and reducer
MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer);
// set up the instructions that will happen in the reducer, after the aggregation instrucions
MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer);
// set up the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
// set up map/reduce memory configurations (if in AM context)
DMLConfig config = ConfigurationManager.getDMLConfig();
DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
// set up custom map/reduce configurations
MRJobConfiguration.setupCustomMRConfigurations(job, config);
// determine degree of parallelism (nmappers: 1<=n<=capacity)
// TODO use maxsparsity whenever we have a way of generating sparse rand data
int capacity = InfrastructureAnalyzer.getRemoteParallelMapTasks();
long dfsblocksize = InfrastructureAnalyzer.getHDFSBlockSize();
// correction max number of mappers on yarn clusters
if (InfrastructureAnalyzer.isYarnEnabled())
capacity = (int) Math.max(capacity, YarnClusterAnalyzer.getNumCores());
int nmapers = Math.max(Math.min((int) (8 * maxbrlen * maxbclen * (long) numblocks / dfsblocksize), capacity), 1);
job.setNumMapTasks(nmapers);
// set up what matrices are needed to pass from the mapper to reducer
HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, dataGenInsStr, instructionsInMapper, null, aggInstructionsInReducer, otherInstructionsInReducer, resultIndexes);
MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, dataGenInsStr, instructionsInMapper, null, aggInstructionsInReducer, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false);
stats = ret.stats;
// set up the number of reducers
MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);
// print the complete MRJob instruction
if (LOG.isTraceEnabled())
inst.printCompleteMRJobInstruction(stats);
// Update resultDimsUnknown based on computed "stats"
byte[] resultDimsUnknown = new byte[resultIndexes.length];
for (int i = 0; i < resultIndexes.length; i++) {
if (stats[i].getRows() == -1 || stats[i].getCols() == -1) {
resultDimsUnknown[i] = (byte) 1;
} else {
resultDimsUnknown[i] = (byte) 0;
}
}
boolean mayContainCtable = instructionsInMapper.contains("ctabletransform") || instructionsInMapper.contains("groupedagg");
// set up the multiple output files, and their format information
MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, mayContainCtable);
// configure mapper and the mapper output key value pairs
job.setMapperClass(DataGenMapper.class);
if (numReducers == 0) {
job.setMapOutputKeyClass(Writable.class);
job.setMapOutputValueClass(Writable.class);
} else {
job.setMapOutputKeyClass(MatrixIndexes.class);
job.setMapOutputValueClass(TaggedMatrixBlock.class);
}
// set up combiner
if (numReducers != 0 && aggInstructionsInReducer != null && !aggInstructionsInReducer.isEmpty())
job.setCombinerClass(GMRCombiner.class);
// configure reducer
job.setReducerClass(GMRReducer.class);
// job.setReducerClass(PassThroughReducer.class);
// By default, the job executes in "cluster" mode.
// Determine if we can optimize and run it in "local" mode.
MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
for (int i = 0; i < inputs.length; i++) {
inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
}
// set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
runjob = JobClient.runJob(job);
/* Process different counters */
Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
for (int i = 0; i < resultIndexes.length; i++) {
// number of non-zeros
stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
}
String dir = dimsUnknownFilePrefix + "/" + runjob.getID().toString() + "_dimsFile";
stats = MapReduceTool.processDimsFiles(dir, stats);
MapReduceTool.deleteFileIfExistOnHDFS(dir);
} finally {
for (String input : inputs) MapReduceTool.deleteFileIfExistOnHDFS(new Path(input), job);
}
return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.
the class DataGenMapper method map.
@Override
public // valueString has to be Text type
void map(Writable key, Writable valueString, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException {
cachedReporter = reporter;
long start = System.currentTimeMillis();
// for each representative matrix, read the record and apply instructions
for (int i = 0; i < representativeMatrixes.size(); i++) {
DataGenMRInstruction genInst = dataGen_instructions.get(i);
if (genInst.getDataGenMethod() == DataGenMethod.RAND) {
RandInstruction randInst = (RandInstruction) genInst;
String[] params = valueString.toString().split(",");
long blockRowNumber = Long.parseLong(params[0]);
long blockColNumber = Long.parseLong(params[1]);
int blockRowSize = Integer.parseInt(params[2]);
int blockColSize = Integer.parseInt(params[3]);
long seed = Long.parseLong(params[4]);
double minValue = randInst.getMinValue();
double maxValue = randInst.getMaxValue();
double sparsity = randInst.getSparsity();
String pdf = randInst.getProbabilityDensityFunction().toLowerCase();
// rand data generation
try {
indexes[i].setIndexes(blockRowNumber, blockColNumber);
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, blockRowSize, blockColSize, blockRowSize, blockColSize, sparsity, minValue, maxValue, randInst.getPdfParams());
block[i].randOperationsInPlace(rgen, null, seed);
} catch (DMLRuntimeException e) {
throw new IOException(e);
}
} else if (genInst.getDataGenMethod() == DataGenMethod.SEQ) {
String[] params = valueString.toString().split(",");
long blockRowNumber = Long.parseLong(params[0]);
long blockColNumber = Long.parseLong(params[1]);
double from = Double.parseDouble(params[2]);
double to = Double.parseDouble(params[3]);
double incr = Double.parseDouble(params[4]);
// handle default 1 to -1 for special case of from>to
incr = LibMatrixDatagen.updateSeqIncr(from, to, incr);
// sequence data generation
try {
indexes[i].setIndexes(blockRowNumber, blockColNumber);
block[i].seqOperationsInPlace(from, to, incr);
} catch (DMLRuntimeException e) {
throw new IOException(e);
}
} else {
throw new IOException("Unknown data generation instruction: " + genInst.toString());
}
// put the input in the cache
cachedValues.reset();
cachedValues.set(genInst.output, indexes[i], block[i]);
// special operations for individual mapp type
specialOperationsForActualMap(i, out, reporter);
}
reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}
use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.
the class MRJobConfiguration method computeMatrixCharacteristics.
/**
* NOTE: this method needs to be in-sync with MRBaseForCommonInstructions.processOneInstruction,
* otherwise, the latter will potentially fail with missing dimension information.
*
* @param job job configuration
* @param inputIndexes array of byte indexes
* @param dataGenInstructions data gen instructions as a string
* @param instructionsInMapper instruction in mapper as a string
* @param reblockInstructions reblock instructions as a string
* @param aggInstructionsInReducer aggregate instructions in reducer as a string
* @param aggBinInstructions binary aggregate instructions as a string
* @param otherInstructionsInReducer other instructions in reducer as a string
* @param resultIndexes array of byte result indexes
* @param mapOutputIndexes set of map output indexes
* @param forMMCJ ?
* @return reducer groups
*/
public static MatrixChar_N_ReducerGroups computeMatrixCharacteristics(JobConf job, byte[] inputIndexes, String dataGenInstructions, String instructionsInMapper, String reblockInstructions, String aggInstructionsInReducer, String aggBinInstructions, String otherInstructionsInReducer, byte[] resultIndexes, HashSet<Byte> mapOutputIndexes, boolean forMMCJ) {
HashSet<Byte> intermediateMatrixIndexes = new HashSet<>();
HashMap<Byte, MatrixCharacteristics> dims = new HashMap<>();
for (byte i : inputIndexes) {
MatrixCharacteristics dim = new MatrixCharacteristics(getNumRows(job, i), getNumColumns(job, i), getNumRowsPerBlock(job, i), getNumColumnsPerBlock(job, i), getNumNonZero(job, i));
dims.put(i, dim);
}
DataGenMRInstruction[] dataGenIns = null;
dataGenIns = MRInstructionParser.parseDataGenInstructions(dataGenInstructions);
if (dataGenIns != null) {
for (DataGenMRInstruction ins : dataGenIns) {
MatrixCharacteristics.computeDimension(dims, ins);
}
}
MRInstruction[] insMapper = MRInstructionParser.parseMixedInstructions(instructionsInMapper);
if (insMapper != null) {
for (MRInstruction ins : insMapper) {
MatrixCharacteristics.computeDimension(dims, ins);
if (ins instanceof UnaryMRInstructionBase) {
UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
intermediateMatrixIndexes.add(tempIns.input);
} else if (ins instanceof AppendMInstruction) {
AppendMInstruction tempIns = (AppendMInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof AppendGInstruction) {
AppendGInstruction tempIns = (AppendGInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof BinaryMInstruction) {
BinaryMInstruction tempIns = (BinaryMInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof AggregateBinaryInstruction) {
AggregateBinaryInstruction tempIns = (AggregateBinaryInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
// TODO
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof MapMultChainInstruction) {
MapMultChainInstruction tempIns = (MapMultChainInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.getInput1(), dims.get(tempIns.getInput2()));
intermediateMatrixIndexes.add(tempIns.getInput1());
} else if (ins instanceof PMMJMRInstruction) {
PMMJMRInstruction tempIns = (PMMJMRInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input2, dims.get(tempIns.input2));
intermediateMatrixIndexes.add(tempIns.input2);
}
}
}
ReblockInstruction[] reblockIns = MRInstructionParser.parseReblockInstructions(reblockInstructions);
if (reblockIns != null) {
for (ReblockInstruction ins : reblockIns) {
MatrixCharacteristics.computeDimension(dims, ins);
setMatrixCharactristicsForReblock(job, ins.output, dims.get(ins.output));
}
}
Instruction[] aggIns = MRInstructionParser.parseAggregateInstructions(aggInstructionsInReducer);
if (aggIns != null) {
for (Instruction ins : aggIns) {
MatrixCharacteristics.computeDimension(dims, (MRInstruction) ins);
// if instruction's output is not in resultIndexes, then add its dimensions to jobconf
MRInstruction mrins = (MRInstruction) ins;
boolean found = false;
for (byte b : resultIndexes) {
if (b == mrins.output) {
found = true;
break;
}
}
if (!found) {
setIntermediateMatrixCharactristics(job, mrins.output, dims.get(mrins.output));
intermediateMatrixIndexes.add(mrins.output);
}
}
}
long numReduceGroups = 0;
AggregateBinaryInstruction[] aggBinIns = getAggregateBinaryInstructions(job);
if (aggBinIns != null) {
for (AggregateBinaryInstruction ins : aggBinIns) {
MatrixCharacteristics dim1 = dims.get(ins.input1);
MatrixCharacteristics dim2 = dims.get(ins.input2);
setMatrixCharactristicsForBinAgg(job, ins.input1, dim1);
setMatrixCharactristicsForBinAgg(job, ins.input2, dim2);
MatrixCharacteristics.computeDimension(dims, ins);
if (// there will be only one aggbin operation for MMCJ
forMMCJ)
numReduceGroups = (long) Math.ceil((double) dim1.getCols() / (double) dim1.getColsPerBlock());
}
}
if (!forMMCJ) {
// store the skylines
ArrayList<Long> xs = new ArrayList<>(mapOutputIndexes.size());
ArrayList<Long> ys = new ArrayList<>(mapOutputIndexes.size());
for (byte idx : mapOutputIndexes) {
MatrixCharacteristics dim = dims.get(idx);
long x = (long) Math.ceil((double) dim.getRows() / (double) dim.getRowsPerBlock());
long y = (long) Math.ceil((double) dim.getCols() / (double) dim.getColsPerBlock());
int i = 0;
boolean toadd = true;
while (i < xs.size()) {
if ((x >= xs.get(i) && y > ys.get(i)) || (x > xs.get(i) && y >= ys.get(i))) {
// remove any included x's and y's
xs.remove(i);
ys.remove(i);
} else if (// if included in others, stop
x <= xs.get(i) && y <= ys.get(i)) {
toadd = false;
break;
} else
i++;
}
if (toadd) {
xs.add(x);
ys.add(y);
}
}
// sort by x
TreeMap<Long, Long> map = new TreeMap<>();
for (int i = 0; i < xs.size(); i++) map.put(xs.get(i), ys.get(i));
numReduceGroups = 0;
// compute area
long prev = 0;
for (Entry<Long, Long> e : map.entrySet()) {
numReduceGroups += (e.getKey() - prev) * e.getValue();
prev = e.getKey();
}
}
MRInstruction[] insReducer = MRInstructionParser.parseMixedInstructions(otherInstructionsInReducer);
if (insReducer != null) {
for (MRInstruction ins : insReducer) {
MatrixCharacteristics.computeDimension(dims, ins);
if (ins instanceof UnaryMRInstructionBase) {
UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
intermediateMatrixIndexes.add(tempIns.input);
} else if (ins instanceof RemoveEmptyMRInstruction) {
RemoveEmptyMRInstruction tempIns = (RemoveEmptyMRInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
}
// if instruction's output is not in resultIndexes, then add its dimensions to jobconf
boolean found = false;
for (byte b : resultIndexes) {
if (b == ins.output) {
found = true;
break;
}
}
if (!found) {
setIntermediateMatrixCharactristics(job, ins.output, dims.get(ins.output));
intermediateMatrixIndexes.add(ins.output);
}
}
}
setIntermediateMatrixIndexes(job, intermediateMatrixIndexes);
for (byte tag : mapOutputIndexes) setMatrixCharactristicsForMapperOutput(job, tag, dims.get(tag));
MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
MatrixCharacteristics resultDims;
for (int i = 0; i < resultIndexes.length; i++) {
resultDims = dims.get(resultIndexes[i]);
stats[i] = resultDims;
setMatrixCharactristicsForOutput(job, resultIndexes[i], stats[i]);
}
return new MatrixChar_N_ReducerGroups(stats, numReduceGroups);
}
use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project systemml by apache.
the class RunMRJobs method executeInMemoryDataGenOperations.
private static JobReturn executeInMemoryDataGenOperations(MRJobInstruction inst, String randInst, MatrixObject[] outputMatrices) {
MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
DataGenMRInstruction[] dgSet = MRInstructionParser.parseDataGenInstructions(randInst);
byte[] results = inst.getIv_resultIndices();
for (DataGenMRInstruction ldgInst : dgSet) {
if (ldgInst instanceof RandInstruction) {
// CP Rand block operation
RandInstruction lrand = (RandInstruction) ldgInst;
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(lrand.getProbabilityDensityFunction(), (int) lrand.getRows(), (int) lrand.getCols(), lrand.getRowsInBlock(), lrand.getColsInBlock(), lrand.getSparsity(), lrand.getMinValue(), lrand.getMaxValue(), lrand.getPdfParams());
MatrixBlock mb = MatrixBlock.randOperations(rgen, lrand.getSeed());
for (int i = 0; i < results.length; i++) if (lrand.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lrand.getRowsInBlock(), lrand.getColsInBlock(), mb.getNonZeros());
}
} else if (ldgInst instanceof SeqInstruction) {
SeqInstruction lseq = (SeqInstruction) ldgInst;
MatrixBlock mb = MatrixBlock.seqOperations(lseq.fromValue, lseq.toValue, lseq.incrValue);
for (int i = 0; i < results.length; i++) if (lseq.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lseq.getRowsInBlock(), lseq.getColsInBlock(), mb.getNonZeros());
}
}
}
return new JobReturn(mc, inst.getOutputInfos(), true);
}
Aggregations