Search in sources :

Example 26 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class ExecutionContext method setMetaData.

public void setMetaData(String varName, long nrows, long ncols) {
    MatrixObject mo = getMatrixObject(varName);
    if (mo.getNumRows() == nrows && mo.getNumColumns() == ncols)
        return;
    MetaData oldMetaData = mo.getMetaData();
    if (oldMetaData == null || !(oldMetaData instanceof MetaDataFormat))
        throw new DMLRuntimeException("Metadata not available");
    MatrixCharacteristics mc = new MatrixCharacteristics(nrows, ncols, (int) mo.getNumRowsPerBlock(), (int) mo.getNumColumnsPerBlock());
    mo.setMetaData(new MetaDataFormat(mc, ((MetaDataFormat) oldMetaData).getOutputInfo(), ((MetaDataFormat) oldMetaData).getInputInfo()));
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MetaData(org.apache.sysml.runtime.matrix.MetaData) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 27 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class ExternalFunctionInvocationInstruction method createOutputMatrixObject.

private MatrixObject createOutputMatrixObject(Matrix m) {
    MatrixObject ret = m.getMatrixObject();
    if (ret == null) {
        // otherwise, pass in-memory matrix from extfunct back to invoking program
        MatrixCharacteristics mc = new MatrixCharacteristics(m.getNumRows(), m.getNumCols(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
        MetaDataFormat mfmd = new MetaDataFormat(mc, InputInfo.getMatchingOutputInfo(iinfo), iinfo);
        ret = new MatrixObject(Expression.ValueType.DOUBLE, m.getFilePath(), mfmd);
    }
    // for allowing in-memory packagesupport matrices w/o file names
    if (ret.getFileName().equals(Matrix.DEFAULT_FILENAME)) {
        ret.setFileName(createDefaultOutputFilePathAndName());
    }
    return ret;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 28 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class FrameEvictionTest method runFrameEvictionTest.

/**
 * @param schema
 * @param sparse
 * @param defaultMeta
 * @param force
 */
private void runFrameEvictionTest(ValueType[] schema, boolean sparse, boolean defaultMeta, boolean force) {
    try {
        // data generation
        double sparsity = sparse ? sparsity2 : sparsity1;
        double[][] A = getRandomMatrix(rows, schema.length, -10, 10, sparsity, 765);
        MatrixBlock mA = DataConverter.convertToMatrixBlock(A);
        FrameBlock fA = DataConverter.convertToFrameBlock(mA, schema);
        // create non-default column names
        if (!defaultMeta) {
            String[] colnames = new String[schema.length];
            for (int i = 0; i < schema.length; i++) colnames[i] = "Custom_name_" + i;
            fA.setColumnNames(colnames);
        }
        // setup caching
        CacheableData.initCaching("tmp_frame_eviction_test");
        // create frame object
        MatrixCharacteristics mc = new MatrixCharacteristics(rows, schema.length, -1, -1, -1);
        MetaDataFormat meta = new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
        FrameObject fo = new FrameObject("fA", meta, schema);
        fo.acquireModify(fA);
        fo.release();
        // evict frame and clear in-memory reference
        if (force)
            LazyWriteBuffer.forceEviction();
        Method clearfo = CacheableData.class.getDeclaredMethod("clearCache", new Class[] {});
        // make method public
        clearfo.setAccessible(true);
        clearfo.invoke(fo, new Object[] {});
        // read frame through buffer pool (if forced, this is a read from disk
        // otherwise deserialization or simple reference depending on schema)
        FrameBlock fA2 = fo.acquireRead();
        fo.release();
        // compare frames
        String[][] sA = DataConverter.convertToStringFrame(fA);
        String[][] sA2 = DataConverter.convertToStringFrame(fA2);
        TestUtils.compareFrames(sA, sA2, rows, schema.length);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    }
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) Method(java.lang.reflect.Method) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock)

Example 29 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class RunMRJobs method submitJob.

/**
 * Submits an MR job instruction, without modifying any state of that instruction.
 *
 * @param inst instruction
 * @return job status
 */
public static JobReturn submitJob(MRJobInstruction inst) {
    JobReturn ret = new JobReturn();
    MatrixObject[] inputMatrices = inst.getInputMatrices();
    MatrixObject[] outputMatrices = inst.getOutputMatrices();
    boolean execCP = false;
    // Spawn MapReduce Jobs
    try {
        // replace all placeholders in all instructions with appropriate values
        String rdInst = inst.getIv_randInstructions();
        String rrInst = inst.getIv_recordReaderInstructions();
        String mapInst = inst.getIv_instructionsInMapper();
        String shuffleInst = inst.getIv_shuffleInstructions();
        String aggInst = inst.getIv_aggInstructions();
        String otherInst = inst.getIv_otherInstructions();
        boolean jvmReuse = ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.JVM_REUSE);
        switch(inst.getJobType()) {
            case GMR:
            case GMRCELL:
                ret = GMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), inst.getPartitioned(), inst.getPformats(), inst.getPsizes(), rrInst, mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case DATAGEN:
                if (ConfigurationManager.isDynamicRecompilation() && OptimizerUtils.ALLOW_RAND_JOB_RECOMPILE && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPDataGen(inst, rdInst)) {
                    ret = executeInMemoryDataGenOperations(inst, rdInst, outputMatrices);
                    Statistics.decrementNoOfExecutedMRJobs();
                    execCP = true;
                } else {
                    ret = DataGenMR.runJob(inst, rdInst.split(Lop.INSTRUCTION_DELIMITOR), mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                }
                break;
            case CM_COV:
                ret = CMCOVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case GROUPED_AGG:
                ret = GroupedAggMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case REBLOCK:
            case CSV_REBLOCK:
                if (ConfigurationManager.isDynamicRecompilation() && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPReblock(inst, inputMatrices)) {
                    ret = executeInMemoryReblockOperations(inst, shuffleInst, inputMatrices, outputMatrices);
                    Statistics.decrementNoOfExecutedMRJobs();
                    execCP = true;
                } else {
                    // export dirty matrices to HDFS (initially deferred)
                    for (MatrixObject m : inputMatrices) {
                        if (m.isDirty())
                            m.exportData();
                    }
                    checkEmptyInputs(inst, inputMatrices);
                    if (inst.getJobType() == JobType.REBLOCK) {
                        ret = ReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), getNNZ(inputMatrices), mapInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                    } else if (inst.getJobType() == JobType.CSV_REBLOCK) {
                        ret = CSVReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                    }
                }
                break;
            case CSV_WRITE:
                ret = WriteCSVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBclens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs());
                break;
            case MMCJ:
                ret = MMCJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0]);
                break;
            case MMRJ:
                ret = MMRJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case SORT:
                boolean weightsflag = true;
                if (!mapInst.equalsIgnoreCase(""))
                    weightsflag = false;
                ret = SortMR.runJob(inst, inst.getInputs()[0], inst.getInputInfos()[0], inst.getRlens()[0], inst.getClens()[0], inst.getBrlens()[0], inst.getBclens()[0], mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0], weightsflag);
                break;
            case COMBINE:
                ret = CombineMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case DATA_PARTITION:
                ret = DataPartitionMR.runJob(inst, inputMatrices, shuffleInst, inst.getIv_resultIndices(), outputMatrices, inst.getIv_numReducers(), inst.getIv_replication());
                break;
            default:
                throw new DMLRuntimeException("Invalid jobtype: " + inst.getJobType());
        }
    }// end of try block
     catch (Exception e) {
        throw new DMLRuntimeException(e);
    }
    if (ret.checkReturnStatus()) {
        /*
			 * Check if any output is empty. If yes, create a dummy file. Needs
			 * to be done only in case of (1) CellOutputInfo and if not CP, or 
			 * (2) BinaryBlockOutputInfo if not CP and output empty blocks disabled.
			 */
        try {
            if (!execCP) {
                for (int i = 0; i < outputMatrices.length; i++) {
                    // get output meta data
                    MetaDataFormat meta = (MetaDataFormat) outputMatrices[i].getMetaData();
                    MatrixCharacteristics mc = meta.getMatrixCharacteristics();
                    OutputInfo outinfo = meta.getOutputInfo();
                    String fname = outputMatrices[i].getFileName();
                    if (MapReduceTool.isHDFSFileEmpty(fname)) {
                        // prepare output file
                        Path filepath = new Path(fname, "0-m-00000");
                        MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(outinfo);
                        writer.writeEmptyMatrixToHDFS(filepath.toString(), mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
                    }
                    outputMatrices[i].setHDFSFileExists(true);
                    if (inst.getJobType() != JobType.CSV_WRITE) {
                        // write out metadata file
                        // Currently, valueType information in not stored in MR instruction,
                        // since only DOUBLE matrices are supported ==> hard coded the value type information for now
                        MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, ret.getMetaData(i).getMatrixCharacteristics(), outinfo);
                    }
                }
            }
            return ret;
        } catch (IOException e) {
            throw new DMLRuntimeException(e);
        }
    }
    // should not come here!
    throw new DMLRuntimeException("Unexpected Job Type: " + inst.getJobType());
}
Also used : Path(org.apache.hadoop.fs.Path) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) IOException(java.io.IOException) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixWriter(org.apache.sysml.runtime.io.MatrixWriter)

Example 30 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MLContextConversionUtil method doubleMatrixToMatrixObject.

/**
 * Convert a two-dimensional double array to a {@code MatrixObject}.
 *
 * @param variableName
 *            name of the variable associated with the matrix
 * @param doubleMatrix
 *            matrix of double values
 * @param matrixMetadata
 *            the matrix metadata
 * @return the two-dimensional double matrix converted to a
 *         {@code MatrixObject}
 */
public static MatrixObject doubleMatrixToMatrixObject(String variableName, double[][] doubleMatrix, MatrixMetadata matrixMetadata) {
    try {
        MatrixBlock matrixBlock = DataConverter.convertToMatrixBlock(doubleMatrix);
        MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics(matrixBlock.getNumRows(), matrixBlock.getNumColumns(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
        MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
        matrixObject.acquireModify(matrixBlock);
        matrixObject.release();
        return matrixObject;
    } catch (DMLRuntimeException e) {
        throw new MLContextException("Exception converting double[][] array to MatrixObject", e);
    }
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)54 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)26 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)17 IOException (java.io.IOException)12 ValueType (org.apache.sysml.parser.Expression.ValueType)10 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)10 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)9 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)7 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)5 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)4 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)4 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)4 DataOp (org.apache.sysml.hops.DataOp)3