use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class UaggOuterChainSPInstruction method parseInstruction.
public static UaggOuterChainSPInstruction parseInstruction(String str) throws DMLRuntimeException {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if (opcode.equalsIgnoreCase(UAggOuterChain.OPCODE)) {
AggregateUnaryOperator uaggop = InstructionUtils.parseBasicAggregateUnaryOperator(parts[1]);
BinaryOperator bop = InstructionUtils.parseBinaryOperator(parts[2]);
CPOperand in1 = new CPOperand(parts[3]);
CPOperand in2 = new CPOperand(parts[4]);
CPOperand out = new CPOperand(parts[5]);
//derive aggregation operator from unary operator
String aopcode = InstructionUtils.deriveAggregateOperatorOpcode(parts[1]);
CorrectionLocationType corrLoc = InstructionUtils.deriveAggregateOperatorCorrectionLocation(parts[1]);
String corrExists = (corrLoc != CorrectionLocationType.NONE) ? "true" : "false";
AggregateOperator aop = InstructionUtils.parseAggregateOperator(aopcode, corrExists, corrLoc.toString());
return new UaggOuterChainSPInstruction(bop, uaggop, aop, in1, in2, out, opcode, str);
} else {
throw new DMLRuntimeException("UaggOuterChainSPInstruction.parseInstruction():: Unknown opcode " + opcode);
}
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class WriteSPInstruction method parseInstruction.
public static WriteSPInstruction parseInstruction(String str) throws DMLRuntimeException {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if (!opcode.equals("write")) {
throw new DMLRuntimeException("Unsupported opcode");
}
// Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
if (parts.length != 5 && parts.length != 9) {
throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
}
//SPARK°write°_mVar2·MATRIX·DOUBLE°./src/test/scripts/functions/data/out/B·SCALAR·STRING·true°matrixmarket·SCALAR·STRING·true
// _mVar2·MATRIX·DOUBLE
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand in3 = new CPOperand(parts[3]);
WriteSPInstruction inst = new WriteSPInstruction(in1, in2, in3, opcode, str);
if (in3.getName().equalsIgnoreCase("csv")) {
boolean hasHeader = Boolean.parseBoolean(parts[4]);
String delim = parts[5];
boolean sparse = Boolean.parseBoolean(parts[6]);
FileFormatProperties formatProperties = new CSVFileFormatProperties(hasHeader, delim, sparse);
inst.setFormatProperties(formatProperties);
boolean isInputMB = Boolean.parseBoolean(parts[7]);
inst.setInputMatrixBlock(isInputMB);
CPOperand in4 = new CPOperand(parts[8]);
inst.input4 = in4;
} else {
FileFormatProperties ffp = new FileFormatProperties();
CPOperand in4 = new CPOperand(parts[4]);
inst.input4 = in4;
inst.setFormatProperties(ffp);
}
return inst;
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class LibMatrixReorg method sort.
public static MatrixBlock sort(MatrixBlock in, MatrixBlock out, int by, boolean desc, boolean ixret) throws DMLRuntimeException {
//meta data gathering and preparation
boolean sparse = in.isInSparseFormat();
int rlen = in.rlen;
int clen = in.clen;
out.sparse = (in.sparse && !ixret);
out.nonZeros = ixret ? rlen : in.nonZeros;
//step 1: error handling
if (by <= 0 || clen < by)
throw new DMLRuntimeException("Sort configuration issue: non-existing orderby column: " + by + " (" + rlen + "x" + clen + " input).");
//step 2: empty block / special case handling
if (//SORT DATA
!ixret) {
if (//EMPTY INPUT BLOCK
in.isEmptyBlock(false))
return out;
if (!sparse && clen == 1) {
//DENSE COLUMN VECTOR
//in-place quicksort, unstable (no indexes needed)
//dense
out.copy(in);
Arrays.sort(out.denseBlock);
if (desc)
sortReverseDense(out);
return out;
}
} else //SORT INDEX
{
if (in.isEmptyBlock(false)) {
//EMPTY INPUT BLOCK
out.allocateDenseBlock(false);
for (//seq(1,n)
int i = 0; //seq(1,n)
i < rlen; //seq(1,n)
i++) out.setValueDenseUnsafe(i, 0, i + 1);
return out;
}
}
//step 3: index vector sorting
//create index vector and extract values
int[] vix = new int[rlen];
double[] values = new double[rlen];
for (int i = 0; i < rlen; i++) {
vix[i] = i;
values[i] = in.quickGetValue(i, by - 1);
}
//sort index vector on extracted data (unstable)
SortUtils.sortByValue(0, rlen, values, vix);
//before we ensure stable outputs, hence we also flip values)
if (desc) {
sortReverseDense(vix);
sortReverseDense(values);
}
//final pass to ensure stable output
for (int i = 0; i < rlen - 1; i++) {
double tmp = values[i];
//determine run of equal values
int len = 0;
while (i + len + 1 < rlen && tmp == values[i + len + 1]) len++;
//unstable sort of run indexes (equal value guaranteed)
if (len > 0) {
Arrays.sort(vix, i, i + len + 1);
//skip processed run
i += len;
}
}
//step 4: create output matrix (guaranteed non-empty, see step 2)
if (!ixret) {
//copy input data in sorted order into result
if (//DENSE
!sparse) {
out.allocateDenseBlock(false);
for (int i = 0; i < rlen; i++) {
System.arraycopy(in.denseBlock, vix[i] * clen, out.denseBlock, i * clen, clen);
}
} else //SPARSE
{
out.allocateSparseRowsBlock(false);
for (int i = 0; i < rlen; i++) if (!in.sparseBlock.isEmpty(vix[i])) {
out.sparseBlock.set(i, in.sparseBlock.get(vix[i]), //row remains unchanged
!SHALLOW_COPY_REORG);
}
}
} else {
//copy sorted index vector into result
out.allocateDenseBlock(false);
for (int i = 0; i < rlen; i++) out.setValueDenseUnsafe(i, 0, vix[i] + 1);
}
return out;
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class LibMatrixReorg method rexpandRows.
private static MatrixBlock rexpandRows(MatrixBlock in, MatrixBlock ret, int max, boolean cast, boolean ignore) throws DMLRuntimeException {
//set meta data
final int rlen = max;
final int clen = in.rlen;
final long nnz = in.nonZeros;
boolean sp = MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz);
ret.reset(rlen, clen, sp);
//setup temporary array for 'buffered append w/ sorting' in order
//to mitigate performance issues due to random row access for large m
//max 12MB
final int blksize = 1024 * 1024;
int[] tmpi = new int[Math.min(blksize, clen)];
double[] tmp = new double[Math.min(blksize, clen)];
//but generic implementation for general case)
for (int i = 0; i < clen; i += blksize) {
//create sorted block indexes (append buffer)
int len = Math.min(blksize, clen - i);
copyColVector(in, i, tmp, tmpi, len);
SortUtils.sortByValue(0, len, tmp, tmpi);
//process current append buffer
for (int j = 0; j < len; j++) {
//get value and cast if necessary (table)
double val = tmp[j];
if (cast)
val = UtilFunctions.toLong(val);
//handle invalid values if not to be ignored
if (!ignore && val <= 0)
throw new DMLRuntimeException("Invalid input value <= 0 for ignore=false: " + val);
//set expanded value if matching
if (val == Math.floor(val) && val >= 1 && val <= max)
ret.appendValue((int) (val - 1), i + tmpi[j], 1);
}
}
//(necessary due to cache-conscious processing w/ unstable sort)
if (ret.isInSparseFormat())
ret.sortSparseRows();
return ret;
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class DataTransform method mrDataTransform.
/**
* Main method to create and/or apply transformation metdata using MapReduce.
*
* @param jobinst MR job instruction
* @param inputs array of input matrices
* @param shuffleInst shuffle instructions
* @param otherInst other instructions
* @param resultIndices byte array of result indices
* @param outputs array of output matrices
* @param numReducers number of reducers
* @param replication ?
* @return MR job result
* @throws Exception if IOException occurs
*/
public static JobReturn mrDataTransform(MRJobInstruction jobinst, MatrixObject[] inputs, String shuffleInst, String otherInst, byte[] resultIndices, MatrixObject[] outputs, int numReducers, int replication) throws Exception {
String[] insts = shuffleInst.split(Instruction.INSTRUCTION_DELIM);
// Parse transform instruction (the first instruction) to obtain relevant fields
TransformOperands oprnds = new TransformOperands(insts[0], inputs[0]);
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
// find the first file in alphabetical ordering of part files in directory inputPath
String smallestFile = CSVReblockMR.findSmallestFile(job, oprnds.inputPath);
// find column names
FileSystem fs = IOUtilFunctions.getFileSystem(smallestFile);
String headerLine = readHeaderLine(fs, oprnds.inputCSVProperties, smallestFile);
HashMap<String, Integer> colNamesToIds = processColumnNames(fs, oprnds.inputCSVProperties, headerLine, smallestFile);
String outHeader = getOutputHeader(fs, headerLine, oprnds);
int numColumns = colNamesToIds.size();
int numColumnsTf = 0;
long numRowsTf = 0;
ArrayList<Integer> csvoutputs = new ArrayList<Integer>();
ArrayList<Integer> bboutputs = new ArrayList<Integer>();
// divide output objects based on output format (CSV or BinaryBlock)
for (int i = 0; i < outputs.length; i++) {
if (outputs[i].getFileFormatProperties() != null && outputs[i].getFileFormatProperties().getFileFormat() == FileFormatProperties.FileFormat.CSV)
csvoutputs.add(i);
else
bboutputs.add(i);
}
boolean isCSV = (csvoutputs.size() > 0);
boolean isBB = (bboutputs.size() > 0);
String tmpPath = MRJobConfiguration.constructTempOutputFilename();
checkIfOutputOverlapsWithTxMtd(outputs, oprnds, isCSV, isBB, csvoutputs, bboutputs, fs);
JobReturn retCSV = null, retBB = null;
if (!oprnds.isApply) {
// build specification file with column IDs insteadof column names
String specWithIDs = processSpecFile(fs, oprnds.inputPath, smallestFile, colNamesToIds, oprnds.inputCSVProperties, oprnds.spec);
// enable GC on colNamesToIds
colNamesToIds = null;
// Build transformation metadata, including recode maps, bin definitions, etc.
// Also, generate part offsets file (counters file), which is to be used in csv-reblock
String partOffsetsFile = MRJobConfiguration.constructTempOutputFilename();
numRowsTf = GenTfMtdMR.runJob(oprnds.inputPath, oprnds.txMtdPath, specWithIDs, smallestFile, partOffsetsFile, oprnds.inputCSVProperties, numColumns, replication, outHeader);
if (numRowsTf == 0)
throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
// store the specFileWithIDs as transformation metadata
MapReduceTool.writeStringToHDFS(specWithIDs, oprnds.txMtdPath + "/" + "spec.json");
numColumnsTf = getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
// Apply transformation metadata, and perform actual transformation
if (isCSV)
retCSV = ApplyTfCSVMR.runJob(oprnds.inputPath, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[csvoutputs.get(0)].getFileName(), partOffsetsFile, oprnds.inputCSVProperties, numColumns, replication, outHeader);
if (isBB) {
DMLConfig conf = ConfigurationManager.getDMLConfig();
int blockSize = conf.getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
CSVReblockInstruction rblk = prepDummyReblockInstruction(oprnds.inputCSVProperties, blockSize);
AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { blockSize }, new int[] { blockSize }, rblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
if (ret1.rlens[0] == 0)
throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
retBB = ApplyTfBBMR.runJob(oprnds.inputPath, insts[1], otherInst, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[bboutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, numRowsTf, numColumns, numColumnsTf, replication, outHeader);
}
MapReduceTool.deleteFileIfExistOnHDFS(new Path(partOffsetsFile), job);
} else {
// enable GC on colNamesToIds
colNamesToIds = null;
// copy given transform metadata (applyTxPath) to specified location (txMtdPath)
MapReduceTool.deleteFileIfExistOnHDFS(new Path(oprnds.txMtdPath), job);
MapReduceTool.copyFileOnHDFS(oprnds.applyTxPath, oprnds.txMtdPath);
// path to specification file
String specWithIDs = (oprnds.spec != null) ? oprnds.spec : MapReduceTool.readStringFromHDFSFile(oprnds.txMtdPath + "/" + "spec.json");
numColumnsTf = getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
if (isCSV) {
DMLConfig conf = ConfigurationManager.getDMLConfig();
int blockSize = conf.getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
CSVReblockInstruction rblk = prepDummyReblockInstruction(oprnds.inputCSVProperties, blockSize);
AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { blockSize }, new int[] { blockSize }, rblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
numRowsTf = ret1.rlens[0];
if (ret1.rlens[0] == 0)
throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
// Apply transformation metadata, and perform actual transformation
retCSV = ApplyTfCSVMR.runJob(oprnds.inputPath, specWithIDs, oprnds.applyTxPath, tmpPath, outputs[csvoutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, numColumns, replication, outHeader);
}
if (isBB) {
// compute part offsets file
CSVReblockInstruction rblk = (CSVReblockInstruction) InstructionParser.parseSingleInstruction(insts[1]);
CSVReblockInstruction newrblk = (CSVReblockInstruction) rblk.clone((byte) 0);
AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { newrblk.brlen }, new int[] { newrblk.bclen }, newrblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
numRowsTf = ret1.rlens[0];
if (ret1.rlens[0] == 0)
throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
// apply transformation metadata, as well as reblock the resulting data
retBB = ApplyTfBBMR.runJob(oprnds.inputPath, insts[1], otherInst, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[bboutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, ret1.rlens[0], ret1.clens[0], numColumnsTf, replication, outHeader);
}
}
// copy auxiliary data (old and new header lines) from temporary location to txMtdPath
moveFilesFromTmp(fs, tmpPath, oprnds.txMtdPath);
// generate matrix metadata file for outputs
if (retCSV != null) {
retCSV.getMatrixCharacteristics(0).setDimension(numRowsTf, numColumnsTf);
CSVFileFormatProperties prop = new CSVFileFormatProperties(false, // use the same header as the input
oprnds.inputCSVProperties.getDelim(), false, Double.NaN, null);
MapReduceTool.writeMetaDataFile(outputs[csvoutputs.get(0)].getFileName() + ".mtd", ValueType.DOUBLE, retCSV.getMatrixCharacteristics(0), OutputInfo.CSVOutputInfo, prop);
return retCSV;
}
if (retBB != null) {
retBB.getMatrixCharacteristics(0).setDimension(numRowsTf, numColumnsTf);
MapReduceTool.writeMetaDataFile(outputs[bboutputs.get(0)].getFileName() + ".mtd", ValueType.DOUBLE, retBB.getMatrixCharacteristics(0), OutputInfo.BinaryBlockOutputInfo);
return retBB;
}
return null;
}
Aggregations