use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.
the class TransformEncodeDecodeTest method runTransformEncodeDecodeTest.
private void runTransformEncodeDecodeTest(ExecType et, boolean sparse, String fmt) {
RUNTIME_PLATFORM platformOld = rtplatform;
// only CP supported
rtplatform = RUNTIME_PLATFORM.HYBRID;
try {
getAndLoadTestConfiguration(TEST_NAME1);
// get input/output info
InputInfo iinfo = InputInfo.stringExternalToInputInfo(fmt);
OutputInfo oinfo = InputInfo.getMatchingOutputInfo(iinfo);
// generate and write input data
double[][] A = TestUtils.round(getRandomMatrix(rows, cols, 1, 15, sparse ? sparsity2 : sparsity1, 7));
FrameBlock FA = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(A));
FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
writer.writeFrameToHDFS(FA, input("F"), rows, cols);
fullDMLScriptName = SCRIPT_DIR + TEST_DIR + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-args", input("F"), fmt, String.valueOf(rows), String.valueOf(cols), SCRIPT_DIR + TEST_DIR + SPEC, output("FO") };
// run test
runTest(true, false, null, -1);
// compare matrices (values recoded to identical codes)
FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
FrameBlock FO = reader.readFrameFromHDFS(output("FO"), 16, 2);
HashMap<String, Long> cFA = getCounts(FA, 1);
Iterator<String[]> iterFO = FO.getStringRowIterator();
while (iterFO.hasNext()) {
String[] row = iterFO.next();
Double expected = (double) cFA.get(row[1]);
Double val = (row[0] != null) ? Double.valueOf(row[0]) : 0;
Assert.assertEquals("Output aggregates don't match: " + expected + " vs " + val, expected, val);
}
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
} finally {
rtplatform = platformOld;
}
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.
the class RunMRJobs method submitJob.
/**
* Submits an MR job instruction, without modifying any state of that instruction.
*
* @param inst instruction
* @return job status
*/
public static JobReturn submitJob(MRJobInstruction inst) {
JobReturn ret = new JobReturn();
MatrixObject[] inputMatrices = inst.getInputMatrices();
MatrixObject[] outputMatrices = inst.getOutputMatrices();
boolean execCP = false;
// Spawn MapReduce Jobs
try {
// replace all placeholders in all instructions with appropriate values
String rdInst = inst.getIv_randInstructions();
String rrInst = inst.getIv_recordReaderInstructions();
String mapInst = inst.getIv_instructionsInMapper();
String shuffleInst = inst.getIv_shuffleInstructions();
String aggInst = inst.getIv_aggInstructions();
String otherInst = inst.getIv_otherInstructions();
boolean jvmReuse = ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.JVM_REUSE);
switch(inst.getJobType()) {
case GMR:
case GMRCELL:
ret = GMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), inst.getPartitioned(), inst.getPformats(), inst.getPsizes(), rrInst, mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
break;
case DATAGEN:
if (ConfigurationManager.isDynamicRecompilation() && OptimizerUtils.ALLOW_RAND_JOB_RECOMPILE && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPDataGen(inst, rdInst)) {
ret = executeInMemoryDataGenOperations(inst, rdInst, outputMatrices);
Statistics.decrementNoOfExecutedMRJobs();
execCP = true;
} else {
ret = DataGenMR.runJob(inst, rdInst.split(Lop.INSTRUCTION_DELIMITOR), mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
}
break;
case CM_COV:
ret = CMCOVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case GROUPED_AGG:
ret = GroupedAggMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
break;
case REBLOCK:
case CSV_REBLOCK:
if (ConfigurationManager.isDynamicRecompilation() && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPReblock(inst, inputMatrices)) {
ret = executeInMemoryReblockOperations(inst, shuffleInst, inputMatrices, outputMatrices);
Statistics.decrementNoOfExecutedMRJobs();
execCP = true;
} else {
// export dirty matrices to HDFS (initially deferred)
for (MatrixObject m : inputMatrices) {
if (m.isDirty())
m.exportData();
}
checkEmptyInputs(inst, inputMatrices);
if (inst.getJobType() == JobType.REBLOCK) {
ret = ReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), getNNZ(inputMatrices), mapInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
} else if (inst.getJobType() == JobType.CSV_REBLOCK) {
ret = CSVReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
}
}
break;
case CSV_WRITE:
ret = WriteCSVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBclens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs());
break;
case MMCJ:
ret = MMCJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0]);
break;
case MMRJ:
ret = MMRJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case SORT:
boolean weightsflag = true;
if (!mapInst.equalsIgnoreCase(""))
weightsflag = false;
ret = SortMR.runJob(inst, inst.getInputs()[0], inst.getInputInfos()[0], inst.getRlens()[0], inst.getClens()[0], inst.getBrlens()[0], inst.getBclens()[0], mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0], weightsflag);
break;
case COMBINE:
ret = CombineMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case DATA_PARTITION:
ret = DataPartitionMR.runJob(inst, inputMatrices, shuffleInst, inst.getIv_resultIndices(), outputMatrices, inst.getIv_numReducers(), inst.getIv_replication());
break;
default:
throw new DMLRuntimeException("Invalid jobtype: " + inst.getJobType());
}
}// end of try block
catch (Exception e) {
throw new DMLRuntimeException(e);
}
if (ret.checkReturnStatus()) {
/*
* Check if any output is empty. If yes, create a dummy file. Needs
* to be done only in case of (1) CellOutputInfo and if not CP, or
* (2) BinaryBlockOutputInfo if not CP and output empty blocks disabled.
*/
try {
if (!execCP) {
for (int i = 0; i < outputMatrices.length; i++) {
// get output meta data
MetaDataFormat meta = (MetaDataFormat) outputMatrices[i].getMetaData();
MatrixCharacteristics mc = meta.getMatrixCharacteristics();
OutputInfo outinfo = meta.getOutputInfo();
String fname = outputMatrices[i].getFileName();
if (MapReduceTool.isHDFSFileEmpty(fname)) {
// prepare output file
Path filepath = new Path(fname, "0-m-00000");
MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(outinfo);
writer.writeEmptyMatrixToHDFS(filepath.toString(), mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
}
outputMatrices[i].setHDFSFileExists(true);
if (inst.getJobType() != JobType.CSV_WRITE) {
// write out metadata file
// Currently, valueType information in not stored in MR instruction,
// since only DOUBLE matrices are supported ==> hard coded the value type information for now
MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, ret.getMetaData(i).getMatrixCharacteristics(), outinfo);
}
}
}
return ret;
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
// should not come here!
throw new DMLRuntimeException("Unexpected Job Type: " + inst.getJobType());
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.
the class VariableCPInstruction method writeMMFile.
/**
* Helper function to write MM files to HDFS.
*
* @param ec execution context
* @param fname file name
*/
private void writeMMFile(ExecutionContext ec, String fname) {
MatrixObject mo = ec.getMatrixObject(getInput1().getName());
String outFmt = "matrixmarket";
if (mo.isDirty()) {
// there exist data computed in CP that is not backed up on HDFS
// i.e., it is either in-memory or in evicted space
mo.exportData(fname, outFmt);
} else {
OutputInfo oi = ((MetaDataFormat) mo.getMetaData()).getOutputInfo();
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
if (oi == OutputInfo.TextCellOutputInfo) {
try {
WriterMatrixMarket.mergeTextcellToMatrixMarket(mo.getFileName(), fname, mc.getRows(), mc.getCols(), mc.getNonZeros());
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
} else if (oi == OutputInfo.BinaryBlockOutputInfo) {
mo.exportData(fname, outFmt);
} else {
throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into MatrixMarket format.");
}
}
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.
the class VariableCPInstruction method writeCSVFile.
/**
* Helper function to write CSV files to HDFS.
*
* @param ec execution context
* @param fname file name
*/
private void writeCSVFile(ExecutionContext ec, String fname) {
MatrixObject mo = ec.getMatrixObject(getInput1().getName());
String outFmt = "csv";
if (mo.isDirty()) {
// there exist data computed in CP that is not backed up on HDFS
// i.e., it is either in-memory or in evicted space
mo.exportData(fname, outFmt, _formatProperties);
} else {
try {
OutputInfo oi = ((MetaDataFormat) mo.getMetaData()).getOutputInfo();
MatrixCharacteristics mc = ((MetaDataFormat) mo.getMetaData()).getMatrixCharacteristics();
if (oi == OutputInfo.CSVOutputInfo) {
WriterTextCSV writer = new WriterTextCSV((CSVFileFormatProperties) _formatProperties);
writer.addHeaderToCSV(mo.getFileName(), fname, mc.getRows(), mc.getCols());
} else if (oi == OutputInfo.BinaryBlockOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
mo.exportData(fname, outFmt, _formatProperties);
} else {
throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into CSV format.");
}
// Write Metadata file
MapReduceTool.writeMetaDataFile(fname + ".mtd", mo.getValueType(), mc, OutputInfo.CSVOutputInfo, _formatProperties);
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.
the class FrameObject method writeBlobFromRDDtoHDFS.
@Override
protected void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String ofmt) throws IOException, DMLRuntimeException {
// prepare output info
MetaDataFormat iimd = (MetaDataFormat) _metaData;
OutputInfo oinfo = (ofmt != null ? OutputInfo.stringToOutputInfo(ofmt) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
// note: the write of an RDD to HDFS might trigger
// lazy evaluation of pending transformations.
SparkExecutionContext.writeFrameRDDtoHDFS(rdd, fname, oinfo);
}
Aggregations