use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class PmmSPInstruction method parseInstruction.
public static PmmSPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = InstructionUtils.getOpCode(str);
if (opcode.equalsIgnoreCase(PMMJ.OPCODE)) {
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand nrow = new CPOperand(parts[3]);
CPOperand out = new CPOperand(parts[4]);
CacheType type = CacheType.valueOf(parts[5]);
AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
return new PmmSPInstruction(aggbin, in1, in2, out, nrow, type, opcode, str);
} else {
throw new DMLRuntimeException("PmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
}
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class QuantileSortSPInstruction method parseInstruction.
public static QuantileSortSPInstruction parseInstruction(String str) {
CPOperand in1 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
CPOperand in2 = null;
CPOperand out = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if (opcode.equalsIgnoreCase(SortKeys.OPCODE)) {
if (parts.length == 3) {
// Example: sort:mVar1:mVar2 (input=mVar1, output=mVar2)
parseUnaryInstruction(str, in1, out);
return new QuantileSortSPInstruction(new SimpleOperator(null), in1, out, opcode, str);
} else if (parts.length == 4) {
// Example: sort:mVar1:mVar2:mVar3 (input=mVar1, weights=mVar2, output=mVar3)
in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
parseUnaryInstruction(str, in1, in2, out);
return new QuantileSortSPInstruction(new SimpleOperator(null), in1, in2, out, opcode, str);
} else {
throw new DMLRuntimeException("Invalid number of operands in instruction: " + str);
}
} else {
throw new DMLRuntimeException("Unknown opcode while parsing a SortSPInstruction: " + str);
}
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class RandSPInstruction method generateRandData.
private void generateRandData(SparkExecutionContext sec) {
long lrows = sec.getScalarInput(rows).getLongValue();
long lcols = sec.getScalarInput(cols).getLongValue();
// step 1: generate pseudo-random seed (because not specified)
// seed per invocation
long lSeed = seed;
if (lSeed == DataGenOp.UNSPECIFIED_SEED)
lSeed = DataGenOp.generateRandomSeed();
if (LOG.isTraceEnabled())
LOG.trace("Process RandSPInstruction rand with seed = " + lSeed + ".");
// step 2: potential in-memory rand operations if applicable
if (isMemAvail(lrows, lcols, sparsity, minValue, maxValue) && DMLScript.rtplatform != RUNTIME_PLATFORM.SPARK) {
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
MatrixBlock mb = MatrixBlock.randOperations(rgen, lSeed);
sec.setMatrixOutput(output.getName(), mb, getExtendedOpcode());
Statistics.decrementNoOfExecutedSPInst();
return;
}
// step 3: seed generation
JavaPairRDD<MatrixIndexes, Long> seedsRDD = null;
Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(lSeed);
double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(lrows, lcols, rowsInBlock, colsInBlock, // overestimate for on disk, ensures hdfs block per partition
sparsity);
double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
MatrixCharacteristics tmp = new MatrixCharacteristics(lrows, lcols, rowsInBlock, colsInBlock);
long numBlocks = tmp.getNumBlocks();
long numColBlocks = tmp.getNumColBlocks();
// a) in-memory seed rdd construction
if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
ArrayList<Tuple2<MatrixIndexes, Long>> seeds = new ArrayList<>();
for (long i = 0; i < numBlocks; i++) {
long r = 1 + i / numColBlocks;
long c = 1 + i % numColBlocks;
MatrixIndexes indx = new MatrixIndexes(r, c);
Long seedForBlock = bigrand.nextLong();
seeds.add(new Tuple2<>(indx, seedForBlock));
}
// for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
// create seeds rdd
seedsRDD = sec.getSparkContext().parallelizePairs(seeds, numPartitions);
} else // b) file-based seed rdd construction (for robustness wrt large number of blocks)
{
Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
PrintWriter pw = null;
try {
FileSystem fs = IOUtilFunctions.getFileSystem(path);
pw = new PrintWriter(fs.create(path));
StringBuilder sb = new StringBuilder();
for (long i = 0; i < numBlocks; i++) {
sb.append(1 + i / numColBlocks);
sb.append(',');
sb.append(1 + i % numColBlocks);
sb.append(',');
sb.append(bigrand.nextLong());
pw.println(sb.toString());
sb.setLength(0);
}
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
} finally {
IOUtilFunctions.closeSilently(pw);
}
// for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
// create seeds rdd
seedsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).mapToPair(new ExtractSeedTuple());
}
// step 4: execute rand instruction over seed input
JavaPairRDD<MatrixIndexes, MatrixBlock> out = seedsRDD.mapToPair(new GenerateRandomBlock(lrows, lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdf, pdfParams));
// step 5: output handling
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (!mcOut.dimsKnown(true)) {
// note: we cannot compute the nnz from sparsity because this would not reflect the
// actual number of non-zeros, except for extreme values of sparsity equals 0 or 1.
long lnnz = (sparsity == 0 || sparsity == 1) ? (long) (sparsity * lrows * lcols) : -1;
mcOut.set(lrows, lcols, rowsInBlock, colsInBlock, lnnz);
}
sec.setRDDHandleForVariable(output.getName(), out);
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class RandSPInstruction method parseInstruction.
public static RandSPInstruction parseInstruction(String str) {
String[] s = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = s[0];
DataGenMethod method = DataGenMethod.INVALID;
if (opcode.equalsIgnoreCase(DataGen.RAND_OPCODE)) {
method = DataGenMethod.RAND;
InstructionUtils.checkNumFields(str, 12);
} else if (opcode.equalsIgnoreCase(DataGen.SEQ_OPCODE)) {
method = DataGenMethod.SEQ;
// 8 operands: rows, cols, rpb, cpb, from, to, incr, outvar
InstructionUtils.checkNumFields(str, 8);
} else if (opcode.equalsIgnoreCase(DataGen.SAMPLE_OPCODE)) {
method = DataGenMethod.SAMPLE;
// 7 operands: range, size, replace, seed, rpb, cpb, outvar
InstructionUtils.checkNumFields(str, 7);
}
Operator op = null;
// output is specified by the last operand
CPOperand out = new CPOperand(s[s.length - 1]);
if (method == DataGenMethod.RAND) {
CPOperand rows = new CPOperand(s[1]);
CPOperand cols = new CPOperand(s[2]);
int rpb = Integer.parseInt(s[3]);
int cpb = Integer.parseInt(s[4]);
double minValue = !s[5].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Double.valueOf(s[5]).doubleValue() : -1;
double maxValue = !s[6].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Double.valueOf(s[6]).doubleValue() : -1;
double sparsity = !s[7].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Double.valueOf(s[7]).doubleValue() : -1;
long seed = !s[8].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Long.valueOf(s[8]).longValue() : -1;
String dir = s[9];
String pdf = s[10];
String pdfParams = !s[11].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? s[11] : null;
return new RandSPInstruction(op, method, null, out, rows, cols, rpb, cpb, minValue, maxValue, sparsity, seed, dir, pdf, pdfParams, opcode, str);
} else if (method == DataGenMethod.SEQ) {
int rpb = Integer.parseInt(s[3]);
int cpb = Integer.parseInt(s[4]);
CPOperand from = new CPOperand(s[5]);
CPOperand to = new CPOperand(s[6]);
CPOperand incr = new CPOperand(s[7]);
CPOperand in = null;
return new RandSPInstruction(op, method, in, out, null, null, rpb, cpb, from, to, incr, opcode, str);
} else if (method == DataGenMethod.SAMPLE) {
double max = !s[1].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Double.valueOf(s[1]) : 0;
CPOperand rows = new CPOperand(s[2]);
CPOperand cols = new CPOperand("1", ValueType.INT, DataType.SCALAR);
boolean replace = (!s[3].contains(Lop.VARIABLE_NAME_PLACEHOLDER) && Boolean.valueOf(s[3]));
long seed = Long.parseLong(s[4]);
int rpb = Integer.parseInt(s[5]);
int cpb = Integer.parseInt(s[6]);
return new RandSPInstruction(op, method, null, out, rows, cols, rpb, cpb, max, replace, seed, opcode, str);
} else
throw new DMLRuntimeException("Unrecognized data generation method: " + method);
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class RandSPInstruction method generateSample.
/**
* Helper function to construct a sample.
*
* @param sec spark execution context
*/
private void generateSample(SparkExecutionContext sec) {
long lrows = sec.getScalarInput(rows).getLongValue();
if (maxValue < lrows && !replace)
throw new DMLRuntimeException("Sample (size=" + rows + ") larger than population (size=" + maxValue + ") can only be generated with replacement.");
if (LOG.isTraceEnabled())
LOG.trace("Process RandSPInstruction sample with range=" + maxValue + ", size=" + lrows + ", replace=" + replace + ", seed=" + seed);
// sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of the time.
double fraction = SamplingUtils.computeFractionForSampleSize((int) lrows, UtilFunctions.toLong(maxValue), replace);
Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(seed);
// divide the population range across numPartitions by creating SampleTasks
double hdfsBlockSize = InfrastructureAnalyzer.getHDFSBlockSize();
long outputSize = MatrixBlock.estimateSizeDenseInMemory(lrows, 1);
int numPartitions = (int) Math.ceil((double) outputSize / hdfsBlockSize);
long partitionSize = (long) Math.ceil(maxValue / numPartitions);
ArrayList<SampleTask> offsets = new ArrayList<>();
long st = 1;
while (st <= maxValue) {
SampleTask s = new SampleTask();
s.range_start = st;
s.seed = bigrand.nextLong();
offsets.add(s);
st = st + partitionSize;
}
JavaRDD<SampleTask> offsetRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
// Construct the sample in a distributed manner
JavaRDD<Double> rdd = offsetRDD.flatMap((new GenerateSampleBlock(replace, fraction, (long) maxValue, partitionSize)));
// Randomize the sampled elements
JavaRDD<Double> randomizedRDD = rdd.mapToPair(new AttachRandom()).sortByKey().values();
// Trim the sampled list to required size & attach matrix indexes to randomized elements
JavaPairRDD<MatrixIndexes, MatrixCell> miRDD = randomizedRDD.zipWithIndex().filter(new TrimSample(lrows)).mapToPair(new Double2MatrixCell());
MatrixCharacteristics mcOut = new MatrixCharacteristics(lrows, 1, rowsInBlock, colsInBlock, lrows);
// Construct BinaryBlock representation
JavaPairRDD<MatrixIndexes, MatrixBlock> mbRDD = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), miRDD, mcOut, true);
sec.getMatrixCharacteristics(output.getName()).setNonZeros(lrows);
sec.setRDDHandleForVariable(output.getName(), mbRDD);
}
Aggregations