use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class ExecutionContext method setMetaData.
public void setMetaData(String varName, long nrows, long ncols) {
MatrixObject mo = getMatrixObject(varName);
if (mo.getNumRows() == nrows && mo.getNumColumns() == ncols)
return;
MetaData oldMetaData = mo.getMetaData();
if (oldMetaData == null || !(oldMetaData instanceof MetaDataFormat))
throw new DMLRuntimeException("Metadata not available");
MatrixCharacteristics mc = new MatrixCharacteristics(nrows, ncols, (int) mo.getNumRowsPerBlock(), (int) mo.getNumColumnsPerBlock());
mo.setMetaData(new MetaDataFormat(mc, ((MetaDataFormat) oldMetaData).getOutputInfo(), ((MetaDataFormat) oldMetaData).getInputInfo()));
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class ExternalFunctionInvocationInstruction method createOutputMatrixObject.
private MatrixObject createOutputMatrixObject(Matrix m) {
MatrixObject ret = m.getMatrixObject();
if (ret == null) {
// otherwise, pass in-memory matrix from extfunct back to invoking program
MatrixCharacteristics mc = new MatrixCharacteristics(m.getNumRows(), m.getNumCols(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
MetaDataFormat mfmd = new MetaDataFormat(mc, InputInfo.getMatchingOutputInfo(iinfo), iinfo);
ret = new MatrixObject(Expression.ValueType.DOUBLE, m.getFilePath(), mfmd);
}
// for allowing in-memory packagesupport matrices w/o file names
if (ret.getFileName().equals(Matrix.DEFAULT_FILENAME)) {
ret.setFileName(createDefaultOutputFilePathAndName());
}
return ret;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class FrameEvictionTest method runFrameEvictionTest.
/**
* @param schema
* @param sparse
* @param defaultMeta
* @param force
*/
private void runFrameEvictionTest(ValueType[] schema, boolean sparse, boolean defaultMeta, boolean force) {
try {
// data generation
double sparsity = sparse ? sparsity2 : sparsity1;
double[][] A = getRandomMatrix(rows, schema.length, -10, 10, sparsity, 765);
MatrixBlock mA = DataConverter.convertToMatrixBlock(A);
FrameBlock fA = DataConverter.convertToFrameBlock(mA, schema);
// create non-default column names
if (!defaultMeta) {
String[] colnames = new String[schema.length];
for (int i = 0; i < schema.length; i++) colnames[i] = "Custom_name_" + i;
fA.setColumnNames(colnames);
}
// setup caching
CacheableData.initCaching("tmp_frame_eviction_test");
// create frame object
MatrixCharacteristics mc = new MatrixCharacteristics(rows, schema.length, -1, -1, -1);
MetaDataFormat meta = new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
FrameObject fo = new FrameObject("fA", meta, schema);
fo.acquireModify(fA);
fo.release();
// evict frame and clear in-memory reference
if (force)
LazyWriteBuffer.forceEviction();
Method clearfo = CacheableData.class.getDeclaredMethod("clearCache", new Class[] {});
// make method public
clearfo.setAccessible(true);
clearfo.invoke(fo, new Object[] {});
// read frame through buffer pool (if forced, this is a read from disk
// otherwise deserialization or simple reference depending on schema)
FrameBlock fA2 = fo.acquireRead();
fo.release();
// compare frames
String[][] sA = DataConverter.convertToStringFrame(fA);
String[][] sA2 = DataConverter.convertToStringFrame(fA2);
TestUtils.compareFrames(sA, sA2, rows, schema.length);
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
}
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class RunMRJobs method submitJob.
/**
* Submits an MR job instruction, without modifying any state of that instruction.
*
* @param inst instruction
* @return job status
*/
public static JobReturn submitJob(MRJobInstruction inst) {
JobReturn ret = new JobReturn();
MatrixObject[] inputMatrices = inst.getInputMatrices();
MatrixObject[] outputMatrices = inst.getOutputMatrices();
boolean execCP = false;
// Spawn MapReduce Jobs
try {
// replace all placeholders in all instructions with appropriate values
String rdInst = inst.getIv_randInstructions();
String rrInst = inst.getIv_recordReaderInstructions();
String mapInst = inst.getIv_instructionsInMapper();
String shuffleInst = inst.getIv_shuffleInstructions();
String aggInst = inst.getIv_aggInstructions();
String otherInst = inst.getIv_otherInstructions();
boolean jvmReuse = ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.JVM_REUSE);
switch(inst.getJobType()) {
case GMR:
case GMRCELL:
ret = GMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), inst.getPartitioned(), inst.getPformats(), inst.getPsizes(), rrInst, mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
break;
case DATAGEN:
if (ConfigurationManager.isDynamicRecompilation() && OptimizerUtils.ALLOW_RAND_JOB_RECOMPILE && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPDataGen(inst, rdInst)) {
ret = executeInMemoryDataGenOperations(inst, rdInst, outputMatrices);
Statistics.decrementNoOfExecutedMRJobs();
execCP = true;
} else {
ret = DataGenMR.runJob(inst, rdInst.split(Lop.INSTRUCTION_DELIMITOR), mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
}
break;
case CM_COV:
ret = CMCOVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case GROUPED_AGG:
ret = GroupedAggMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
break;
case REBLOCK:
case CSV_REBLOCK:
if (ConfigurationManager.isDynamicRecompilation() && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPReblock(inst, inputMatrices)) {
ret = executeInMemoryReblockOperations(inst, shuffleInst, inputMatrices, outputMatrices);
Statistics.decrementNoOfExecutedMRJobs();
execCP = true;
} else {
// export dirty matrices to HDFS (initially deferred)
for (MatrixObject m : inputMatrices) {
if (m.isDirty())
m.exportData();
}
checkEmptyInputs(inst, inputMatrices);
if (inst.getJobType() == JobType.REBLOCK) {
ret = ReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), getNNZ(inputMatrices), mapInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
} else if (inst.getJobType() == JobType.CSV_REBLOCK) {
ret = CSVReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
}
}
break;
case CSV_WRITE:
ret = WriteCSVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBclens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs());
break;
case MMCJ:
ret = MMCJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0]);
break;
case MMRJ:
ret = MMRJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case SORT:
boolean weightsflag = true;
if (!mapInst.equalsIgnoreCase(""))
weightsflag = false;
ret = SortMR.runJob(inst, inst.getInputs()[0], inst.getInputInfos()[0], inst.getRlens()[0], inst.getClens()[0], inst.getBrlens()[0], inst.getBclens()[0], mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0], weightsflag);
break;
case COMBINE:
ret = CombineMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
break;
case DATA_PARTITION:
ret = DataPartitionMR.runJob(inst, inputMatrices, shuffleInst, inst.getIv_resultIndices(), outputMatrices, inst.getIv_numReducers(), inst.getIv_replication());
break;
default:
throw new DMLRuntimeException("Invalid jobtype: " + inst.getJobType());
}
}// end of try block
catch (Exception e) {
throw new DMLRuntimeException(e);
}
if (ret.checkReturnStatus()) {
/*
* Check if any output is empty. If yes, create a dummy file. Needs
* to be done only in case of (1) CellOutputInfo and if not CP, or
* (2) BinaryBlockOutputInfo if not CP and output empty blocks disabled.
*/
try {
if (!execCP) {
for (int i = 0; i < outputMatrices.length; i++) {
// get output meta data
MetaDataFormat meta = (MetaDataFormat) outputMatrices[i].getMetaData();
MatrixCharacteristics mc = meta.getMatrixCharacteristics();
OutputInfo outinfo = meta.getOutputInfo();
String fname = outputMatrices[i].getFileName();
if (MapReduceTool.isHDFSFileEmpty(fname)) {
// prepare output file
Path filepath = new Path(fname, "0-m-00000");
MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(outinfo);
writer.writeEmptyMatrixToHDFS(filepath.toString(), mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
}
outputMatrices[i].setHDFSFileExists(true);
if (inst.getJobType() != JobType.CSV_WRITE) {
// write out metadata file
// Currently, valueType information in not stored in MR instruction,
// since only DOUBLE matrices are supported ==> hard coded the value type information for now
MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, ret.getMetaData(i).getMatrixCharacteristics(), outinfo);
}
}
}
return ret;
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
// should not come here!
throw new DMLRuntimeException("Unexpected Job Type: " + inst.getJobType());
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MLContextConversionUtil method doubleMatrixToMatrixObject.
/**
* Convert a two-dimensional double array to a {@code MatrixObject}.
*
* @param variableName
* name of the variable associated with the matrix
* @param doubleMatrix
* matrix of double values
* @param matrixMetadata
* the matrix metadata
* @return the two-dimensional double matrix converted to a
* {@code MatrixObject}
*/
public static MatrixObject doubleMatrixToMatrixObject(String variableName, double[][] doubleMatrix, MatrixMetadata matrixMetadata) {
try {
MatrixBlock matrixBlock = DataConverter.convertToMatrixBlock(doubleMatrix);
MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics(matrixBlock.getNumRows(), matrixBlock.getNumColumns(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
matrixObject.acquireModify(matrixBlock);
matrixObject.release();
return matrixObject;
} catch (DMLRuntimeException e) {
throw new MLContextException("Exception converting double[][] array to MatrixObject", e);
}
}
Aggregations