use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class TransformEncodeDecodeTest method runTransformEncodeDecodeTest.
private void runTransformEncodeDecodeTest(ExecType et, boolean sparse, String fmt) {
RUNTIME_PLATFORM platformOld = rtplatform;
// only CP supported
rtplatform = RUNTIME_PLATFORM.HYBRID;
try {
getAndLoadTestConfiguration(TEST_NAME1);
// get input/output info
InputInfo iinfo = InputInfo.stringExternalToInputInfo(fmt);
OutputInfo oinfo = InputInfo.getMatchingOutputInfo(iinfo);
// generate and write input data
double[][] A = TestUtils.round(getRandomMatrix(rows, cols, 1, 15, sparse ? sparsity2 : sparsity1, 7));
FrameBlock FA = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(A));
FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
writer.writeFrameToHDFS(FA, input("F"), rows, cols);
fullDMLScriptName = SCRIPT_DIR + TEST_DIR + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-args", input("F"), fmt, String.valueOf(rows), String.valueOf(cols), SCRIPT_DIR + TEST_DIR + SPEC, output("FO") };
// run test
runTest(true, false, null, -1);
// compare matrices (values recoded to identical codes)
FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
FrameBlock FO = reader.readFrameFromHDFS(output("FO"), 16, 2);
HashMap<String, Long> cFA = getCounts(FA, 1);
Iterator<String[]> iterFO = FO.getStringRowIterator();
while (iterFO.hasNext()) {
String[] row = iterFO.next();
Double expected = (double) cFA.get(row[1]);
Double val = (row[0] != null) ? Double.valueOf(row[0]) : 0;
Assert.assertEquals("Output aggregates don't match: " + expected + " vs " + val, expected, val);
}
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
} finally {
rtplatform = platformOld;
}
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class Dag method generateMapReduceInstructions.
/**
* Method to generate MapReduce job instructions from a given set of nodes.
*
* @param execNodes list of exec nodes
* @param inst list of instructions
* @param writeinst list of write instructions
* @param deleteinst list of delete instructions
* @param rmvarinst list of rmvar instructions
* @param jt job type
*/
private void generateMapReduceInstructions(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeinst, ArrayList<Instruction> deleteinst, ArrayList<Instruction> rmvarinst, JobType jt) {
ArrayList<Byte> resultIndices = new ArrayList<>();
ArrayList<String> inputs = new ArrayList<>();
ArrayList<String> outputs = new ArrayList<>();
ArrayList<InputInfo> inputInfos = new ArrayList<>();
ArrayList<OutputInfo> outputInfos = new ArrayList<>();
ArrayList<Long> numRows = new ArrayList<>();
ArrayList<Long> numCols = new ArrayList<>();
ArrayList<Long> numRowsPerBlock = new ArrayList<>();
ArrayList<Long> numColsPerBlock = new ArrayList<>();
ArrayList<String> mapperInstructions = new ArrayList<>();
ArrayList<String> randInstructions = new ArrayList<>();
ArrayList<String> recordReaderInstructions = new ArrayList<>();
int numReducers = 0;
int replication = 1;
ArrayList<String> inputLabels = new ArrayList<>();
ArrayList<String> outputLabels = new ArrayList<>();
ArrayList<Instruction> renameInstructions = new ArrayList<>();
ArrayList<Instruction> variableInstructions = new ArrayList<>();
ArrayList<Instruction> postInstructions = new ArrayList<>();
ArrayList<Integer> MRJobLineNumbers = null;
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers = new ArrayList<>();
}
ArrayList<Lop> inputLops = new ArrayList<>();
boolean cellModeOverride = false;
/* Find the nodes that produce an output */
ArrayList<Lop> rootNodes = new ArrayList<>();
getOutputNodes(execNodes, rootNodes, jt);
if (LOG.isTraceEnabled())
LOG.trace("# of root nodes = " + rootNodes.size());
/* Remove transient writes that are simple copy of transient reads */
if (jt == JobType.GMR || jt == JobType.GMRCELL) {
ArrayList<Lop> markedNodes = new ArrayList<>();
// only keep data nodes that are results of some computation.
for (Lop rnode : rootNodes) {
if (rnode.getExecLocation() == ExecLocation.Data && ((Data) rnode).isTransient() && ((Data) rnode).getOperationType() == OperationTypes.WRITE && ((Data) rnode).getDataType() == DataType.MATRIX) {
// no computation, just a copy
if (rnode.getInputs().get(0).getExecLocation() == ExecLocation.Data && ((Data) rnode.getInputs().get(0)).isTransient() && rnode.getOutputParameters().getLabel().equals(rnode.getInputs().get(0).getOutputParameters().getLabel())) {
markedNodes.add(rnode);
}
}
}
// delete marked nodes
rootNodes.removeAll(markedNodes);
markedNodes.clear();
if (rootNodes.isEmpty())
return;
}
// structure that maps node to their indices that will be used in the instructions
HashMap<Lop, Integer> nodeIndexMapping = new HashMap<>();
for (Lop rnode : rootNodes) {
getInputPathsAndParameters(rnode, execNodes, inputs, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
}
// In case of RAND job, instructions are defined in the input file
if (jt == JobType.DATAGEN)
randInstructions = inputs;
int[] start_index = new int[1];
start_index[0] = inputs.size();
// currently, recordreader instructions are allowed only in GMR jobs
if (jt == JobType.GMR || jt == JobType.GMRCELL) {
for (Lop rnode : rootNodes) {
getRecordReaderInstructions(rnode, execNodes, inputs, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
if (recordReaderInstructions.size() > 1)
throw new LopsException("MapReduce job can only have a single recordreader instruction: " + recordReaderInstructions.toString());
}
}
//
if (jt != JobType.REBLOCK && jt != JobType.CSV_REBLOCK && jt != JobType.DATAGEN) {
for (int i = 0; i < inputInfos.size(); i++) if (inputInfos.get(i) == InputInfo.BinaryCellInputInfo || inputInfos.get(i) == InputInfo.TextCellInputInfo)
cellModeOverride = true;
}
if (!recordReaderInstructions.isEmpty() || jt == JobType.GROUPED_AGG)
cellModeOverride = true;
for (int i = 0; i < rootNodes.size(); i++) {
getMapperInstructions(rootNodes.get(i), execNodes, inputs, mapperInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
}
if (LOG.isTraceEnabled()) {
LOG.trace(" Input strings: " + inputs.toString());
if (jt == JobType.DATAGEN)
LOG.trace(" Rand instructions: " + getCSVString(randInstructions));
if (jt == JobType.GMR)
LOG.trace(" RecordReader instructions: " + getCSVString(recordReaderInstructions));
LOG.trace(" Mapper instructions: " + getCSVString(mapperInstructions));
}
/* Get Shuffle and Reducer Instructions */
ArrayList<String> shuffleInstructions = new ArrayList<>();
ArrayList<String> aggInstructionsReducer = new ArrayList<>();
ArrayList<String> otherInstructionsReducer = new ArrayList<>();
for (Lop rn : rootNodes) {
int resultIndex = getAggAndOtherInstructions(rn, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
if (resultIndex == -1)
throw new LopsException("Unexpected error in piggybacking!");
if (rn.getExecLocation() == ExecLocation.Data && ((Data) rn).getOperationType() == Data.OperationTypes.WRITE && ((Data) rn).isTransient() && rootNodes.contains(rn.getInputs().get(0))) {
// Both rn (a transient write) and its input are root nodes.
// Instead of creating two copies of the data, simply generate a cpvar instruction
NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, true);
writeinst.addAll(out.getLastInstructions());
} else {
resultIndices.add(Byte.valueOf((byte) resultIndex));
// setup output filenames and outputInfos and generate related instructions
NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, false);
outputLabels.add(out.getVarName());
outputs.add(out.getFileName());
outputInfos.add(out.getOutInfo());
if (LOG.isTraceEnabled()) {
LOG.trace(" Output Info: " + out.getFileName() + ";" + OutputInfo.outputInfoToString(out.getOutInfo()) + ";" + out.getVarName());
}
renameInstructions.addAll(out.getLastInstructions());
variableInstructions.addAll(out.getPreInstructions());
postInstructions.addAll(out.getPostInstructions());
}
}
/* Determine if the output dimensions are known */
byte[] resultIndicesByte = new byte[resultIndices.size()];
for (int i = 0; i < resultIndicesByte.length; i++) {
resultIndicesByte[i] = resultIndices.get(i).byteValue();
}
if (LOG.isTraceEnabled()) {
LOG.trace(" Shuffle Instructions: " + getCSVString(shuffleInstructions));
LOG.trace(" Aggregate Instructions: " + getCSVString(aggInstructionsReducer));
LOG.trace(" Other instructions =" + getCSVString(otherInstructionsReducer));
LOG.trace(" Output strings: " + outputs.toString());
LOG.trace(" ResultIndices = " + resultIndices.toString());
}
/* Prepare the MapReduce job instruction */
MRJobInstruction mr = new MRJobInstruction(jt);
// check if this is a map-only job. If not, set the number of reducers
if (!shuffleInstructions.isEmpty() || !aggInstructionsReducer.isEmpty() || !otherInstructionsReducer.isEmpty())
numReducers = total_reducers;
// set inputs, outputs, and other other properties for the job
mr.setInputOutputLabels(inputLabels.toArray(new String[0]), outputLabels.toArray(new String[0]));
mr.setOutputs(resultIndicesByte);
mr.setDimsUnknownFilePrefix(getFilePath());
mr.setNumberOfReducers(numReducers);
mr.setReplication(replication);
// set instructions for recordReader and mapper
mr.setRecordReaderInstructions(getCSVString(recordReaderInstructions));
mr.setMapperInstructions(getCSVString(mapperInstructions));
// compute and set mapper memory requirements (for consistency of runtime piggybacking)
if (jt == JobType.GMR) {
double mem = 0;
for (Lop n : execNodes) mem += computeFootprintInMapper(n);
mr.setMemoryRequirements(mem);
}
if (jt == JobType.DATAGEN)
mr.setRandInstructions(getCSVString(randInstructions));
// set shuffle instructions
mr.setShuffleInstructions(getCSVString(shuffleInstructions));
// set reducer instruction
mr.setAggregateInstructionsInReducer(getCSVString(aggInstructionsReducer));
mr.setOtherInstructionsInReducer(getCSVString(otherInstructionsReducer));
if (DMLScript.ENABLE_DEBUG_MODE) {
// set line number information for each MR instruction
mr.setMRJobInstructionsLineNumbers(MRJobLineNumbers);
}
/* Add the prepared instructions to output set */
inst.addAll(variableInstructions);
inst.add(mr);
inst.addAll(postInstructions);
deleteinst.addAll(renameInstructions);
for (Lop l : inputLops) {
if (DMLScript.ENABLE_DEBUG_MODE) {
processConsumers(l, rmvarinst, deleteinst, l);
} else {
processConsumers(l, rmvarinst, deleteinst, null);
}
}
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class Dag method getOutputInfo.
/**
* Method that determines the output format for a given node.
*
* @param node low-level operator
* @param cellModeOverride override mode
* @return output info
*/
private static OutputInfo getOutputInfo(Lop node, boolean cellModeOverride) {
if ((node.getDataType() == DataType.SCALAR && node.getExecType() == ExecType.CP) || node instanceof FunctionCallCP)
return null;
OutputInfo oinfo = null;
OutputParameters oparams = node.getOutputParameters();
if (oparams.isBlocked()) {
if (!cellModeOverride)
oinfo = OutputInfo.BinaryBlockOutputInfo;
else {
// output format is overridden, for example, due to recordReaderInstructions in the job
oinfo = OutputInfo.BinaryCellOutputInfo;
// which stores the outputInfo.
try {
oparams.setDimensions(oparams.getNumRows(), oparams.getNumCols(), -1, -1, oparams.getNnz(), oparams.getUpdateType());
} catch (HopsException e) {
throw new LopsException(node.printErrorLocation() + "error in getOutputInfo in Dag ", e);
}
}
} else {
if (oparams.getFormat() == Format.TEXT || oparams.getFormat() == Format.MM)
oinfo = OutputInfo.TextCellOutputInfo;
else if (oparams.getFormat() == Format.CSV) {
oinfo = OutputInfo.CSVOutputInfo;
} else {
oinfo = OutputInfo.BinaryCellOutputInfo;
}
}
/* Instead of following hardcoding, one must get this information from Lops */
if (node.getType() == Type.SortKeys && node.getExecType() == ExecType.MR) {
if (((SortKeys) node).getOpType() == SortKeys.OperationTypes.Indexes)
oinfo = OutputInfo.BinaryBlockOutputInfo;
else
oinfo = OutputInfo.OutputInfoForSortOutput;
} else if (node.getType() == Type.CombineBinary) {
// Output format of CombineBinary (CB) depends on how the output is consumed
CombineBinary combine = (CombineBinary) node;
if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreSort) {
oinfo = OutputInfo.OutputInfoForSortInput;
} else if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCentralMoment || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCovUnweighted || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreGroupedAggUnweighted) {
oinfo = OutputInfo.WeightedPairOutputInfo;
}
} else if (node.getType() == Type.CombineTernary) {
oinfo = OutputInfo.WeightedPairOutputInfo;
} else if (node.getType() == Type.CentralMoment || node.getType() == Type.CoVariance) {
// CMMR always operate in "cell mode",
// and the output is always in cell format
oinfo = OutputInfo.BinaryCellOutputInfo;
}
return oinfo;
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class RunMRJobs method submitJob.
/**
* Submits an MR job instruction, without modifying any state of that instruction.
*
* @param inst instruction
* @return job status
*/
public static JobReturn submitJob(MRJobInstruction inst) {
JobReturn ret = new JobReturn();
MatrixObject[] inputMatrices = inst.getInputMatrices();
MatrixObject[] outputMatrices = inst.getOutputMatrices();
boolean execCP = false;
// Spawn MapReduce Jobs
try {
// replace all placeholders in all instructions with appropriate values
String rdInst = inst.getIv_randInstructions();
String rrInst = inst.getIv_recordReaderInstructions();
String mapInst = inst.getIv_instructionsInMapper();
String shuffleInst = inst.getIv_shuffleInstructions();
String aggInst = inst.getIv_aggInstructions();
String otherInst = inst.getIv_otherInstructions();
boolean jvmReuse = ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.JVM_REUSE);
switch(inst.getJobType()) {
case GMR:
case GMRCELL:
ret = GMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), inst.getPartitioned(), inst.getPformats(), inst.getPsizes(), rrInst, mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
break;
case DATAGEN:
if (ConfigurationManager.isDynamicRecompilation() && OptimizerUtils.ALLOW_RAND_JOB_RECOMPILE && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPDataGen(inst, rdInst)) {
ret = executeInMemoryDataGenOperations(inst, rdInst, outputMatrices);
Statistics.decrementNoOfExecutedMRJobs();
execCP = true;
} else {
ret = DataGenMR.runJob(inst, rdInst.split(Lop.INSTRUCTION_DELIMITOR), mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
}
break;
case CM_COV:
ret = CMCOVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case GROUPED_AGG:
ret = GroupedAggMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
break;
case REBLOCK:
case CSV_REBLOCK:
if (ConfigurationManager.isDynamicRecompilation() && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPReblock(inst, inputMatrices)) {
ret = executeInMemoryReblockOperations(inst, shuffleInst, inputMatrices, outputMatrices);
Statistics.decrementNoOfExecutedMRJobs();
execCP = true;
} else {
// export dirty matrices to HDFS (initially deferred)
for (MatrixObject m : inputMatrices) {
if (m.isDirty())
m.exportData();
}
checkEmptyInputs(inst, inputMatrices);
if (inst.getJobType() == JobType.REBLOCK) {
ret = ReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), getNNZ(inputMatrices), mapInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
} else if (inst.getJobType() == JobType.CSV_REBLOCK) {
ret = CSVReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
}
}
break;
case CSV_WRITE:
ret = WriteCSVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBclens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs());
break;
case MMCJ:
ret = MMCJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0]);
break;
case MMRJ:
ret = MMRJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case SORT:
boolean weightsflag = true;
if (!mapInst.equalsIgnoreCase(""))
weightsflag = false;
ret = SortMR.runJob(inst, inst.getInputs()[0], inst.getInputInfos()[0], inst.getRlens()[0], inst.getClens()[0], inst.getBrlens()[0], inst.getBclens()[0], mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0], weightsflag);
break;
case COMBINE:
ret = CombineMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case DATA_PARTITION:
ret = DataPartitionMR.runJob(inst, inputMatrices, shuffleInst, inst.getIv_resultIndices(), outputMatrices, inst.getIv_numReducers(), inst.getIv_replication());
break;
default:
throw new DMLRuntimeException("Invalid jobtype: " + inst.getJobType());
}
}// end of try block
catch (Exception e) {
throw new DMLRuntimeException(e);
}
if (ret.checkReturnStatus()) {
/*
* Check if any output is empty. If yes, create a dummy file. Needs
* to be done only in case of (1) CellOutputInfo and if not CP, or
* (2) BinaryBlockOutputInfo if not CP and output empty blocks disabled.
*/
try {
if (!execCP) {
for (int i = 0; i < outputMatrices.length; i++) {
// get output meta data
MetaDataFormat meta = (MetaDataFormat) outputMatrices[i].getMetaData();
MatrixCharacteristics mc = meta.getMatrixCharacteristics();
OutputInfo outinfo = meta.getOutputInfo();
String fname = outputMatrices[i].getFileName();
if (MapReduceTool.isHDFSFileEmpty(fname)) {
// prepare output file
Path filepath = new Path(fname, "0-m-00000");
MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(outinfo);
writer.writeEmptyMatrixToHDFS(filepath.toString(), mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
}
outputMatrices[i].setHDFSFileExists(true);
if (inst.getJobType() != JobType.CSV_WRITE) {
// write out metadata file
// Currently, valueType information in not stored in MR instruction,
// since only DOUBLE matrices are supported ==> hard coded the value type information for now
MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, ret.getMetaData(i).getMatrixCharacteristics(), outinfo);
}
}
}
return ret;
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
// should not come here!
throw new DMLRuntimeException("Unexpected Job Type: " + inst.getJobType());
}
use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.
the class JMLCInputStreamReadTest method runJMLCInputStreamReadTest.
private void runJMLCInputStreamReadTest(DataType dt, boolean sparse, String format, boolean metaData) throws IOException {
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
// generate inputs
OutputInfo oinfo = format.equals("csv") ? OutputInfo.CSVOutputInfo : OutputInfo.TextCellOutputInfo;
double[][] data = TestUtils.round(getRandomMatrix(rows, cols, 0.51, 7.49, sparse ? sparsity2 : sparsity1, 7));
Connection conn = new Connection();
try {
if (dt == DataType.MATRIX) {
// write input matrix
MatrixBlock mb = DataConverter.convertToMatrixBlock(data);
MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(oinfo);
writer.writeMatrixToHDFS(mb, output("X"), rows, cols, -1, -1, -1);
// read matrix from input stream
FileInputStream fis = new FileInputStream(output("X"));
double[][] data2 = conn.convertToDoubleMatrix(fis, rows, cols, format);
fis.close();
// compare matrix result
TestUtils.compareMatrices(data, data2, rows, cols, 0);
} else if (dt == DataType.FRAME) {
// write input frame
String[][] fdata = FrameTransformTest.createFrameData(data, "V");
// test quoted tokens w/ inner quotes
fdata[3][1] = "\"ab\"\"cdef\"";
if (format.equals("csv"))
// test delimiter and space tokens
fdata[7][2] = "\"a,bc def\"";
FrameBlock fb = DataConverter.convertToFrameBlock(fdata);
if (metaData) {
fb.setColumnNames(IntStream.range(0, cols).mapToObj(i -> "CC" + i).collect(Collectors.toList()).toArray(new String[0]));
}
FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
writer.writeFrameToHDFS(fb, output("X"), rows, cols);
// read frame from input stream
FileInputStream fis = new FileInputStream(output("X"));
String[][] fdata2 = conn.convertToStringFrame(fis, rows, cols, format);
fis.close();
// compare frame result
TestUtils.compareFrames(fdata, fdata2, rows, cols);
} else {
throw new IOException("Unsupported data type: " + dt.name());
}
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
MapReduceTool.deleteFileIfExistOnHDFS(output("X"));
IOUtilFunctions.closeSilently(conn);
}
}
Aggregations