use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project incubator-systemml by apache.
the class RandSPInstruction method generateRandData.
private void generateRandData(SparkExecutionContext sec) {
long lrows = sec.getScalarInput(rows).getLongValue();
long lcols = sec.getScalarInput(cols).getLongValue();
// step 1: generate pseudo-random seed (because not specified)
// seed per invocation
long lSeed = seed;
if (lSeed == DataGenOp.UNSPECIFIED_SEED)
lSeed = DataGenOp.generateRandomSeed();
if (LOG.isTraceEnabled())
LOG.trace("Process RandSPInstruction rand with seed = " + lSeed + ".");
// step 2: potential in-memory rand operations if applicable
if (isMemAvail(lrows, lcols, sparsity, minValue, maxValue) && DMLScript.rtplatform != RUNTIME_PLATFORM.SPARK) {
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
MatrixBlock mb = MatrixBlock.randOperations(rgen, lSeed);
sec.setMatrixOutput(output.getName(), mb, getExtendedOpcode());
Statistics.decrementNoOfExecutedSPInst();
return;
}
// step 3: seed generation
JavaPairRDD<MatrixIndexes, Long> seedsRDD = null;
Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(lSeed);
double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(lrows, lcols, rowsInBlock, colsInBlock, // overestimate for on disk, ensures hdfs block per partition
sparsity);
double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
MatrixCharacteristics tmp = new MatrixCharacteristics(lrows, lcols, rowsInBlock, colsInBlock);
long numBlocks = tmp.getNumBlocks();
long numColBlocks = tmp.getNumColBlocks();
// a) in-memory seed rdd construction
if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
ArrayList<Tuple2<MatrixIndexes, Long>> seeds = new ArrayList<>();
for (long i = 0; i < numBlocks; i++) {
long r = 1 + i / numColBlocks;
long c = 1 + i % numColBlocks;
MatrixIndexes indx = new MatrixIndexes(r, c);
Long seedForBlock = bigrand.nextLong();
seeds.add(new Tuple2<>(indx, seedForBlock));
}
// for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
// create seeds rdd
seedsRDD = sec.getSparkContext().parallelizePairs(seeds, numPartitions);
} else // b) file-based seed rdd construction (for robustness wrt large number of blocks)
{
Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
PrintWriter pw = null;
try {
FileSystem fs = IOUtilFunctions.getFileSystem(path);
pw = new PrintWriter(fs.create(path));
StringBuilder sb = new StringBuilder();
for (long i = 0; i < numBlocks; i++) {
sb.append(1 + i / numColBlocks);
sb.append(',');
sb.append(1 + i % numColBlocks);
sb.append(',');
sb.append(bigrand.nextLong());
pw.println(sb.toString());
sb.setLength(0);
}
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
} finally {
IOUtilFunctions.closeSilently(pw);
}
// for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
// create seeds rdd
seedsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).mapToPair(new ExtractSeedTuple());
}
// step 4: execute rand instruction over seed input
JavaPairRDD<MatrixIndexes, MatrixBlock> out = seedsRDD.mapToPair(new GenerateRandomBlock(lrows, lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdf, pdfParams));
// step 5: output handling
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (!mcOut.dimsKnown(true)) {
// note: we cannot compute the nnz from sparsity because this would not reflect the
// actual number of non-zeros, except for extreme values of sparsity equals 0 or 1.
long lnnz = (sparsity == 0 || sparsity == 1) ? (long) (sparsity * lrows * lcols) : -1;
mcOut.set(lrows, lcols, rowsInBlock, colsInBlock, lnnz);
}
sec.setRDDHandleForVariable(output.getName(), out);
}
use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project incubator-systemml by apache.
the class DataGenCPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
MatrixBlock soresBlock = null;
// process specific datagen operator
if (method == DataGenMethod.RAND) {
long lrows = ec.getScalarInput(rows).getLongValue();
long lcols = ec.getScalarInput(cols).getLongValue();
checkValidDimensions(lrows, lcols);
// generate pseudo-random seed (because not specified)
// seed per invocation
long lSeed = seed;
if (lSeed == DataGenOp.UNSPECIFIED_SEED)
lSeed = DataGenOp.generateRandomSeed();
if (LOG.isTraceEnabled())
LOG.trace("Process DataGenCPInstruction rand with seed = " + lSeed + ".");
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
soresBlock = MatrixBlock.randOperations(rgen, seed, numThreads);
} else if (method == DataGenMethod.SEQ) {
double lfrom = ec.getScalarInput(seq_from).getDoubleValue();
double lto = ec.getScalarInput(seq_to).getDoubleValue();
double lincr = ec.getScalarInput(seq_incr).getDoubleValue();
// handle default 1 to -1 for special case of from>to
lincr = LibMatrixDatagen.updateSeqIncr(lfrom, lto, lincr);
if (LOG.isTraceEnabled())
LOG.trace("Process DataGenCPInstruction seq with seqFrom=" + lfrom + ", seqTo=" + lto + ", seqIncr" + lincr);
soresBlock = MatrixBlock.seqOperations(lfrom, lto, lincr);
} else if (method == DataGenMethod.SAMPLE) {
long lrows = ec.getScalarInput(rows).getLongValue();
long range = UtilFunctions.toLong(maxValue);
checkValidDimensions(lrows, 1);
if (LOG.isTraceEnabled())
LOG.trace("Process DataGenCPInstruction sample with range=" + range + ", size=" + lrows + ", replace" + replace + ", seed=" + seed);
if (range < lrows && !replace)
throw new DMLRuntimeException("Sample (size=" + lrows + ") larger than population (size=" + range + ") can only be generated with replacement.");
soresBlock = MatrixBlock.sampleOperations(range, (int) lrows, replace, seed);
}
// guarded sparse block representation change
if (soresBlock.getInMemorySize() < OptimizerUtils.SAFE_REP_CHANGE_THRES)
soresBlock.examSparsity();
// release created output
ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode());
}
use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project incubator-systemml by apache.
the class RunMRJobs method executeInMemoryDataGenOperations.
private static JobReturn executeInMemoryDataGenOperations(MRJobInstruction inst, String randInst, MatrixObject[] outputMatrices) {
MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
DataGenMRInstruction[] dgSet = MRInstructionParser.parseDataGenInstructions(randInst);
byte[] results = inst.getIv_resultIndices();
for (DataGenMRInstruction ldgInst : dgSet) {
if (ldgInst instanceof RandInstruction) {
// CP Rand block operation
RandInstruction lrand = (RandInstruction) ldgInst;
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(lrand.getProbabilityDensityFunction(), (int) lrand.getRows(), (int) lrand.getCols(), lrand.getRowsInBlock(), lrand.getColsInBlock(), lrand.getSparsity(), lrand.getMinValue(), lrand.getMaxValue(), lrand.getPdfParams());
MatrixBlock mb = MatrixBlock.randOperations(rgen, lrand.getSeed());
for (int i = 0; i < results.length; i++) if (lrand.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lrand.getRowsInBlock(), lrand.getColsInBlock(), mb.getNonZeros());
}
} else if (ldgInst instanceof SeqInstruction) {
SeqInstruction lseq = (SeqInstruction) ldgInst;
MatrixBlock mb = MatrixBlock.seqOperations(lseq.fromValue, lseq.toValue, lseq.incrValue);
for (int i = 0; i < results.length; i++) if (lseq.output == results[i]) {
outputMatrices[i].acquireModify(mb);
outputMatrices[i].release();
mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lseq.getRowsInBlock(), lseq.getColsInBlock(), mb.getNonZeros());
}
}
}
return new JobReturn(mc, inst.getOutputInfos(), true);
}
use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project incubator-systemml by apache.
the class DataGenMapper method map.
@Override
public // valueString has to be Text type
void map(Writable key, Writable valueString, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException {
cachedReporter = reporter;
long start = System.currentTimeMillis();
// for each representative matrix, read the record and apply instructions
for (int i = 0; i < representativeMatrixes.size(); i++) {
DataGenMRInstruction genInst = dataGen_instructions.get(i);
if (genInst.getDataGenMethod() == DataGenMethod.RAND) {
RandInstruction randInst = (RandInstruction) genInst;
String[] params = valueString.toString().split(",");
long blockRowNumber = Long.parseLong(params[0]);
long blockColNumber = Long.parseLong(params[1]);
int blockRowSize = Integer.parseInt(params[2]);
int blockColSize = Integer.parseInt(params[3]);
long seed = Long.parseLong(params[4]);
double minValue = randInst.getMinValue();
double maxValue = randInst.getMaxValue();
double sparsity = randInst.getSparsity();
String pdf = randInst.getProbabilityDensityFunction().toLowerCase();
// rand data generation
try {
indexes[i].setIndexes(blockRowNumber, blockColNumber);
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, blockRowSize, blockColSize, blockRowSize, blockColSize, sparsity, minValue, maxValue, randInst.getPdfParams());
block[i].randOperationsInPlace(rgen, null, seed);
} catch (DMLRuntimeException e) {
throw new IOException(e);
}
} else if (genInst.getDataGenMethod() == DataGenMethod.SEQ) {
String[] params = valueString.toString().split(",");
long blockRowNumber = Long.parseLong(params[0]);
long blockColNumber = Long.parseLong(params[1]);
double from = Double.parseDouble(params[2]);
double to = Double.parseDouble(params[3]);
double incr = Double.parseDouble(params[4]);
// handle default 1 to -1 for special case of from>to
incr = LibMatrixDatagen.updateSeqIncr(from, to, incr);
// sequence data generation
try {
indexes[i].setIndexes(blockRowNumber, blockColNumber);
block[i].seqOperationsInPlace(from, to, incr);
} catch (DMLRuntimeException e) {
throw new IOException(e);
}
} else {
throw new IOException("Unknown data generation instruction: " + genInst.toString());
}
// put the input in the cache
cachedValues.reset();
cachedValues.set(genInst.output, indexes[i], block[i]);
// special operations for individual mapp type
specialOperationsForActualMap(i, out, reporter);
}
reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}
use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project systemml by apache.
the class DataGenCPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
MatrixBlock soresBlock = null;
// process specific datagen operator
if (method == DataGenMethod.RAND) {
long lrows = ec.getScalarInput(rows).getLongValue();
long lcols = ec.getScalarInput(cols).getLongValue();
checkValidDimensions(lrows, lcols);
// generate pseudo-random seed (because not specified)
// seed per invocation
long lSeed = seed;
if (lSeed == DataGenOp.UNSPECIFIED_SEED)
lSeed = DataGenOp.generateRandomSeed();
if (LOG.isTraceEnabled())
LOG.trace("Process DataGenCPInstruction rand with seed = " + lSeed + ".");
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
soresBlock = MatrixBlock.randOperations(rgen, seed, numThreads);
} else if (method == DataGenMethod.SEQ) {
double lfrom = ec.getScalarInput(seq_from).getDoubleValue();
double lto = ec.getScalarInput(seq_to).getDoubleValue();
double lincr = ec.getScalarInput(seq_incr).getDoubleValue();
// handle default 1 to -1 for special case of from>to
lincr = LibMatrixDatagen.updateSeqIncr(lfrom, lto, lincr);
if (LOG.isTraceEnabled())
LOG.trace("Process DataGenCPInstruction seq with seqFrom=" + lfrom + ", seqTo=" + lto + ", seqIncr" + lincr);
soresBlock = MatrixBlock.seqOperations(lfrom, lto, lincr);
} else if (method == DataGenMethod.SAMPLE) {
long lrows = ec.getScalarInput(rows).getLongValue();
long range = UtilFunctions.toLong(maxValue);
checkValidDimensions(lrows, 1);
if (LOG.isTraceEnabled())
LOG.trace("Process DataGenCPInstruction sample with range=" + range + ", size=" + lrows + ", replace" + replace + ", seed=" + seed);
if (range < lrows && !replace)
throw new DMLRuntimeException("Sample (size=" + lrows + ") larger than population (size=" + range + ") can only be generated with replacement.");
soresBlock = MatrixBlock.sampleOperations(range, (int) lrows, replace, seed);
}
// guarded sparse block representation change
if (soresBlock.getInMemorySize() < OptimizerUtils.SAFE_REP_CHANGE_THRES)
soresBlock.examSparsity();
// release created output
ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode());
}
Aggregations