use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ReblockSPInstruction method processMatrixReblockInstruction.
@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) throws DMLRuntimeException {
MatrixObject mo = sec.getMatrixObject(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
//check jdk version (prevent double.parseDouble contention on <jdk8)
sec.checkAndRaiseValidationWarningJDKVersion();
//get the input textcell rdd
JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
//convert textcell to binary block
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.CSVInputInfo) {
// HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
// throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
CSVReblockSPInstruction csvInstruction = null;
boolean hasHeader = false;
String delim = ",";
boolean fill = false;
double fillValue = 0;
if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
hasHeader = props.hasHeader();
delim = props.getDelim();
fill = props.isFill();
fillValue = props.getFillValue();
}
csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
csvInstruction.processInstruction(sec);
return;
} else if (iinfo == InputInfo.BinaryCellInputInfo) {
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.BinaryBlockInputInfo) {
//BINARY BLOCK <- BINARY BLOCK (different sizes)
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
out = RDDAggregateUtils.mergeByKey(out, false);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else {
throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
}
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ReblockSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//set the output characteristics
CacheableData<?> obj = sec.getCacheableData(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(mc.getRows(), mc.getCols(), brlen, bclen, mc.getNonZeros());
//get the source format form the meta data
MatrixFormatMetaData iimd = (MatrixFormatMetaData) obj.getMetaData();
if (iimd == null)
throw new DMLRuntimeException("Error: Metadata not found");
InputInfo iinfo = iimd.getInputInfo();
//check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
if (Recompiler.checkCPReblock(sec, input1.getName())) {
if (input1.getDataType() == DataType.MATRIX)
Recompiler.executeInMemoryMatrixReblock(sec, input1.getName(), output.getName());
else if (input1.getDataType() == DataType.FRAME)
Recompiler.executeInMemoryFrameReblock(sec, input1.getName(), output.getName());
return;
}
//execute matrix/frame reblock
if (input1.getDataType() == DataType.MATRIX)
processMatrixReblockInstruction(sec, iinfo);
else if (input1.getDataType() == DataType.FRAME)
processFrameReblockInstruction(sec, iinfo);
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ReorgSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String opcode = getOpcode();
//get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
if (//TRANSPOSE
opcode.equalsIgnoreCase("r'")) {
//execute transpose reorg operation
out = in1.mapToPair(new ReorgMapFunction(opcode));
} else if (//REVERSE
opcode.equalsIgnoreCase("rev")) {
//execute reverse reorg operation
out = in1.flatMapToPair(new RDDRevFunction(mcIn));
if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
out = RDDAggregateUtils.mergeByKey(out, false);
} else if (// DIAG
opcode.equalsIgnoreCase("rdiag")) {
if (mcIn.getCols() == 1) {
// diagV2M
out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
} else {
// diagM2V
//execute diagM2V operation
out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
}
} else if (//ORDER
opcode.equalsIgnoreCase("rsort")) {
// Sort by column 'col' in ascending/descending order and return either index/value
//get parameters
long col = ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue();
boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
boolean singleCol = (mcIn.getCols() == 1);
// extract column (if necessary) and sort
out = in1;
if (!singleCol) {
out = out.filter(new IsBlockInRange(1, mcIn.getRows(), col, col, mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(col, mcIn.getColsPerBlock())));
}
//actual index/data sort operation
if (ixret) {
//sort indexes
out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
} else if (singleCol && !desc) {
//sort single-column matrix
out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
} else {
//sort multi-column matrix
if (!_bSortIndInMem)
out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
else
out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
}
} else {
throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
}
//store output rdd handle
updateReorgMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class RandSPInstruction method generateSequence.
private void generateSequence(SparkExecutionContext sec) throws DMLRuntimeException {
//sanity check valid increment
if (seq_incr == 0) {
throw new DMLRuntimeException("ERROR: While performing seq(" + seq_from + "," + seq_to + "," + seq_incr + ")");
}
//handle default 1 to -1 for special case of from>to
seq_incr = LibMatrixDatagen.updateSeqIncr(seq_from, seq_to, seq_incr);
if (LOG.isTraceEnabled())
LOG.trace("Process RandSPInstruction seq with seqFrom=" + seq_from + ", seqTo=" + seq_to + ", seqIncr" + seq_incr);
//step 1: offset generation
JavaRDD<Double> offsetsRDD = null;
long nnz = (long) Math.abs(Math.round((seq_to - seq_from) / seq_incr)) + 1;
double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(nnz, 1, rowsInBlock, colsInBlock, //overestimate for on disk, ensures hdfs block per partition
nnz);
double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
long numBlocks = (long) Math.ceil(((double) nnz) / rowsInBlock);
//a) in-memory offset rdd construction
if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
ArrayList<Double> offsets = new ArrayList<Double>();
for (long i = 0; i < numBlocks; i++) {
double off = seq_from + seq_incr * i * rowsInBlock;
offsets.add(off);
}
//for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
//create offset rdd
offsetsRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
} else //b) file-based offset rdd construction (for robustness wrt large number of blocks)
{
Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
PrintWriter pw = null;
try {
FileSystem fs = IOUtilFunctions.getFileSystem(path);
pw = new PrintWriter(fs.create(path));
for (long i = 0; i < numBlocks; i++) {
double off = seq_from + seq_incr * i * rowsInBlock;
pw.println(off);
}
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
} finally {
IOUtilFunctions.closeSilently(pw);
}
//for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
//create seeds rdd
offsetsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).map(new ExtractOffsetTuple());
}
//sanity check number of non-zeros
if (nnz != rows && rows != -1) {
throw new DMLRuntimeException("Incorrect number of non-zeros: " + nnz + " != " + rows);
}
//step 2: execute seq instruction over offset input
JavaPairRDD<MatrixIndexes, MatrixBlock> out = offsetsRDD.mapToPair(new GenerateSequenceBlock(rowsInBlock, seq_from, seq_to, seq_incr));
//step 3: output handling
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (!mcOut.dimsKnown()) {
mcOut.set(nnz, 1, rowsInBlock, colsInBlock, nnz);
}
sec.setRDDHandleForVariable(output.getName(), out);
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class RandSPInstruction method generateRandData.
private void generateRandData(SparkExecutionContext sec) throws DMLRuntimeException {
//step 1: generate pseudo-random seed (because not specified)
//seed per invocation
long lSeed = seed;
if (lSeed == DataGenOp.UNSPECIFIED_SEED)
lSeed = DataGenOp.generateRandomSeed();
if (LOG.isTraceEnabled())
LOG.trace("Process RandSPInstruction rand with seed = " + lSeed + ".");
//step 2: potential in-memory rand operations if applicable
if (isMemAvail(rows, cols, sparsity, minValue, maxValue) && DMLScript.rtplatform != RUNTIME_PLATFORM.SPARK) {
RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) rows, (int) cols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
MatrixBlock mb = MatrixBlock.randOperations(rgen, lSeed);
sec.setMatrixOutput(output.getName(), mb);
Statistics.decrementNoOfExecutedSPInst();
return;
}
//step 3: seed generation
JavaPairRDD<MatrixIndexes, Tuple2<Long, Long>> seedsRDD = null;
Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(lSeed);
LongStream nnz = LibMatrixDatagen.computeNNZperBlock(rows, cols, rowsInBlock, colsInBlock, sparsity);
PrimitiveIterator.OfLong nnzIter = nnz.iterator();
double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(rows, cols, rowsInBlock, colsInBlock, //overestimate for on disk, ensures hdfs block per partition
rows * cols * sparsity);
double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
long numBlocks = new MatrixCharacteristics(rows, cols, rowsInBlock, colsInBlock).getNumBlocks();
long numColBlocks = (long) Math.ceil((double) cols / (double) colsInBlock);
//a) in-memory seed rdd construction
if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
ArrayList<Tuple2<MatrixIndexes, Tuple2<Long, Long>>> seeds = new ArrayList<Tuple2<MatrixIndexes, Tuple2<Long, Long>>>();
for (long i = 0; i < numBlocks; i++) {
long r = 1 + i / numColBlocks;
long c = 1 + i % numColBlocks;
MatrixIndexes indx = new MatrixIndexes(r, c);
Long seedForBlock = bigrand.nextLong();
seeds.add(new Tuple2<MatrixIndexes, Tuple2<Long, Long>>(indx, new Tuple2<Long, Long>(seedForBlock, nnzIter.nextLong())));
}
//for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
//create seeds rdd
seedsRDD = sec.getSparkContext().parallelizePairs(seeds, numPartitions);
} else //b) file-based seed rdd construction (for robustness wrt large number of blocks)
{
Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
PrintWriter pw = null;
try {
FileSystem fs = IOUtilFunctions.getFileSystem(path);
pw = new PrintWriter(fs.create(path));
StringBuilder sb = new StringBuilder();
for (long i = 0; i < numBlocks; i++) {
sb.append(1 + i / numColBlocks);
sb.append(',');
sb.append(1 + i % numColBlocks);
sb.append(',');
sb.append(bigrand.nextLong());
sb.append(',');
sb.append(nnzIter.nextLong());
pw.println(sb.toString());
sb.setLength(0);
}
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
} finally {
IOUtilFunctions.closeSilently(pw);
}
//for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
//create seeds rdd
seedsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).mapToPair(new ExtractSeedTuple());
}
//step 4: execute rand instruction over seed input
JavaPairRDD<MatrixIndexes, MatrixBlock> out = seedsRDD.mapToPair(new GenerateRandomBlock(rows, cols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdf, pdfParams));
//step 5: output handling
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (!mcOut.dimsKnown(true)) {
//note: we cannot compute the nnz from sparsity because this would not reflect the
//actual number of non-zeros, except for extreme values of sparsity equals 0 or 1.
long lnnz = (sparsity == 0 || sparsity == 1) ? (long) (sparsity * rows * cols) : -1;
mcOut.set(rows, cols, rowsInBlock, colsInBlock, lnnz);
}
sec.setRDDHandleForVariable(output.getName(), out);
}
Aggregations