use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class RandSPInstruction method generateSequence.
private void generateSequence(SparkExecutionContext sec) {
double lfrom = sec.getScalarInput(seq_from).getDoubleValue();
double lto = sec.getScalarInput(seq_to).getDoubleValue();
double lincr = sec.getScalarInput(seq_incr).getDoubleValue();
// sanity check valid increment
if (lincr == 0) {
throw new DMLRuntimeException("ERROR: While performing seq(" + lfrom + "," + lto + "," + lincr + ")");
}
// handle default 1 to -1 for special case of from>to
lincr = LibMatrixDatagen.updateSeqIncr(lfrom, lto, lincr);
if (LOG.isTraceEnabled())
LOG.trace("Process RandSPInstruction seq with seqFrom=" + lfrom + ", seqTo=" + lto + ", seqIncr" + lincr);
// step 1: offset generation
JavaRDD<Double> offsetsRDD = null;
long nnz = UtilFunctions.getSeqLength(lfrom, lto, lincr);
double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(nnz, 1, rowsInBlock, colsInBlock, // overestimate for on disk, ensures hdfs block per partition
nnz);
double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
long numBlocks = (long) Math.ceil(((double) nnz) / rowsInBlock);
// a) in-memory offset rdd construction
if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
ArrayList<Double> offsets = new ArrayList<>();
for (long i = 0; i < numBlocks; i++) {
double off = lfrom + lincr * i * rowsInBlock;
offsets.add(off);
}
// for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
// create offset rdd
offsetsRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
} else // b) file-based offset rdd construction (for robustness wrt large number of blocks)
{
Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
PrintWriter pw = null;
try {
FileSystem fs = IOUtilFunctions.getFileSystem(path);
pw = new PrintWriter(fs.create(path));
for (long i = 0; i < numBlocks; i++) {
double off = lfrom + lincr * i * rowsInBlock;
pw.println(off);
}
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
} finally {
IOUtilFunctions.closeSilently(pw);
}
// for load balancing: degree of parallelism such that ~128MB per partition
int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
// create seeds rdd
offsetsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).map(new ExtractOffsetTuple());
}
// step 2: execute seq instruction over offset input
JavaPairRDD<MatrixIndexes, MatrixBlock> out = offsetsRDD.mapToPair(new GenerateSequenceBlock(rowsInBlock, lfrom, lto, lincr));
// step 3: output handling
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (!mcOut.dimsKnown()) {
mcOut.set(nnz, 1, rowsInBlock, colsInBlock, nnz);
}
sec.setRDDHandleForVariable(output.getName(), out);
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ReblockSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// set the output characteristics
CacheableData<?> obj = sec.getCacheableData(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(mc.getRows(), mc.getCols(), brlen, bclen, mc.getNonZeros());
// get the source format form the meta data
MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
if (iimd == null)
throw new DMLRuntimeException("Error: Metadata not found");
InputInfo iinfo = iimd.getInputInfo();
// check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
if (Recompiler.checkCPReblock(sec, input1.getName())) {
if (input1.getDataType() == DataType.MATRIX)
Recompiler.executeInMemoryMatrixReblock(sec, input1.getName(), output.getName());
else if (input1.getDataType() == DataType.FRAME)
Recompiler.executeInMemoryFrameReblock(sec, input1.getName(), output.getName());
return;
}
// execute matrix/frame reblock
if (input1.getDataType() == DataType.MATRIX)
processMatrixReblockInstruction(sec, iinfo);
else if (input1.getDataType() == DataType.FRAME)
processFrameReblockInstruction(sec, iinfo);
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ReblockSPInstruction method processMatrixReblockInstruction.
@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) {
MatrixObject mo = sec.getMatrixObject(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
// get the input textcell rdd
JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
// convert textcell to binary block
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.CSVInputInfo) {
// HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
// throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
CSVReblockSPInstruction csvInstruction = null;
boolean hasHeader = false;
String delim = ",";
boolean fill = false;
double fillValue = 0;
if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
hasHeader = props.hasHeader();
delim = props.getDelim();
fill = props.isFill();
fillValue = props.getFillValue();
}
csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
csvInstruction.processInstruction(sec);
return;
} else if (iinfo == InputInfo.BinaryCellInputInfo) {
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.BinaryBlockInputInfo) {
// BINARY BLOCK <- BINARY BLOCK (different sizes)
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
boolean shuffleFreeReblock = mc.dimsKnown() && mcOut.dimsKnown() && (mc.getRows() < mcOut.getRowsPerBlock() || mc.getRowsPerBlock() % mcOut.getRowsPerBlock() == 0) && (mc.getCols() < mcOut.getColsPerBlock() || mc.getColsPerBlock() % mcOut.getColsPerBlock() == 0);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
if (!shuffleFreeReblock)
out = RDDAggregateUtils.mergeByKey(out, false);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else {
throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
}
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ReorgSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String opcode = getOpcode();
// get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
if (// TRANSPOSE
opcode.equalsIgnoreCase("r'")) {
// execute transpose reorg operation
out = in1.mapToPair(new ReorgMapFunction(opcode));
} else if (// REVERSE
opcode.equalsIgnoreCase("rev")) {
// execute reverse reorg operation
out = in1.flatMapToPair(new RDDRevFunction(mcIn));
if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
out = RDDAggregateUtils.mergeByKey(out, false);
} else if (// DIAG
opcode.equalsIgnoreCase("rdiag")) {
if (mcIn.getCols() == 1) {
// diagV2M
out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
} else {
// diagM2V
// execute diagM2V operation
out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
}
} else if (// ORDER
opcode.equalsIgnoreCase("rsort")) {
// Sort by column 'col' in ascending/descending order and return either index/value
// get parameters
long[] cols = _col.getDataType().isMatrix() ? DataConverter.convertToLongVector(ec.getMatrixInput(_col.getName())) : new long[] { ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue() };
boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
boolean singleCol = (mcIn.getCols() == 1);
out = in1;
if (cols.length > mcIn.getColsPerBlock())
LOG.warn("Unsupported sort with number of order-by columns large than blocksize: " + cols.length);
if (singleCol || cols.length == 1) {
// extract column (if necessary) and sort
if (!singleCol)
out = out.filter(new IsBlockInRange(1, mcIn.getRows(), cols[0], cols[0], mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(cols[0], mcIn.getColsPerBlock())));
// actual index/data sort operation
if (// sort indexes
ixret)
out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
else if (// sort single-column matrix
singleCol && !desc)
out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
else if (// sort multi-column matrix w/ rewrite
!_bSortIndInMem)
out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
else
// sort multi-column matrix
out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
} else {
// extract columns (if necessary)
if (cols.length < mcIn.getCols())
out = out.filter(new IsBlockInList(cols, mcIn)).mapToPair(new ExtractColumns(cols, mcIn));
// append extracted columns (if necessary)
if (mcIn.getCols() > mcIn.getColsPerBlock())
out = RDDAggregateUtils.mergeByKey(out);
// actual index/data sort operation
if (// sort indexes
ixret)
out = RDDSortUtils.sortIndexesByVals(out, !desc, mcIn.getRows(), (long) cols.length, mcIn.getRowsPerBlock());
else if (// sort single-column matrix
cols.length == mcIn.getCols() && !desc)
out = RDDSortUtils.sortByVals(out, mcIn.getRows(), cols.length, mcIn.getRowsPerBlock());
else
// sort multi-column matrix
out = RDDSortUtils.sortDataByVals(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), cols.length, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
}
} else {
throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
}
// store output rdd handle
if (opcode.equalsIgnoreCase("rsort") && _col.getDataType().isMatrix())
sec.releaseMatrixInput(_col.getName());
updateReorgMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ReorgSPInstruction method updateReorgMatrixCharacteristics.
private void updateReorgMatrixCharacteristics(SparkExecutionContext sec) {
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
// infer initially unknown dimensions from inputs
if (!mcOut.dimsKnown()) {
if (!mc1.dimsKnown())
throw new DMLRuntimeException("Unable to compute output matrix characteristics from input.");
if (getOpcode().equalsIgnoreCase("r'"))
mcOut.set(mc1.getCols(), mc1.getRows(), mc1.getColsPerBlock(), mc1.getRowsPerBlock());
else if (getOpcode().equalsIgnoreCase("rdiag"))
mcOut.set(mc1.getRows(), (mc1.getCols() > 1) ? 1 : mc1.getRows(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
else if (getOpcode().equalsIgnoreCase("rsort")) {
boolean ixret = sec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
mcOut.set(mc1.getRows(), ixret ? 1 : mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
}
}
// infer initially unknown nnz from input
if (!mcOut.nnzKnown() && mc1.nnzKnown()) {
boolean sortIx = getOpcode().equalsIgnoreCase("rsort") && sec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
if (sortIx)
mcOut.setNonZeros(mc1.getRows());
else
// default (r', rdiag, rsort data)
mcOut.setNonZeros(mc1.getNonZeros());
}
}
Aggregations