Search in sources :

Example 51 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.

the class TransformEncodeDecodeTest method runTransformEncodeDecodeTest.

private void runTransformEncodeDecodeTest(ExecType et, boolean sparse, String fmt) {
    RUNTIME_PLATFORM platformOld = rtplatform;
    // only CP supported
    rtplatform = RUNTIME_PLATFORM.HYBRID;
    try {
        getAndLoadTestConfiguration(TEST_NAME1);
        // get input/output info
        InputInfo iinfo = InputInfo.stringExternalToInputInfo(fmt);
        OutputInfo oinfo = InputInfo.getMatchingOutputInfo(iinfo);
        // generate and write input data
        double[][] A = TestUtils.round(getRandomMatrix(rows, cols, 1, 15, sparse ? sparsity2 : sparsity1, 7));
        FrameBlock FA = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(A));
        FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
        writer.writeFrameToHDFS(FA, input("F"), rows, cols);
        fullDMLScriptName = SCRIPT_DIR + TEST_DIR + TEST_NAME1 + ".dml";
        programArgs = new String[] { "-explain", "-args", input("F"), fmt, String.valueOf(rows), String.valueOf(cols), SCRIPT_DIR + TEST_DIR + SPEC, output("FO") };
        // run test
        runTest(true, false, null, -1);
        // compare matrices (values recoded to identical codes)
        FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
        FrameBlock FO = reader.readFrameFromHDFS(output("FO"), 16, 2);
        HashMap<String, Long> cFA = getCounts(FA, 1);
        Iterator<String[]> iterFO = FO.getStringRowIterator();
        while (iterFO.hasNext()) {
            String[] row = iterFO.next();
            Double expected = (double) cFA.get(row[1]);
            Double val = (row[0] != null) ? Double.valueOf(row[0]) : 0;
            Assert.assertEquals("Output aggregates don't match: " + expected + " vs " + val, expected, val);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    } finally {
        rtplatform = platformOld;
    }
}
Also used : FrameWriter(org.apache.sysml.runtime.io.FrameWriter) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 52 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.

the class RunMRJobs method submitJob.

/**
 * Submits an MR job instruction, without modifying any state of that instruction.
 *
 * @param inst instruction
 * @return job status
 */
public static JobReturn submitJob(MRJobInstruction inst) {
    JobReturn ret = new JobReturn();
    MatrixObject[] inputMatrices = inst.getInputMatrices();
    MatrixObject[] outputMatrices = inst.getOutputMatrices();
    boolean execCP = false;
    // Spawn MapReduce Jobs
    try {
        // replace all placeholders in all instructions with appropriate values
        String rdInst = inst.getIv_randInstructions();
        String rrInst = inst.getIv_recordReaderInstructions();
        String mapInst = inst.getIv_instructionsInMapper();
        String shuffleInst = inst.getIv_shuffleInstructions();
        String aggInst = inst.getIv_aggInstructions();
        String otherInst = inst.getIv_otherInstructions();
        boolean jvmReuse = ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.JVM_REUSE);
        switch(inst.getJobType()) {
            case GMR:
            case GMRCELL:
                ret = GMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), inst.getPartitioned(), inst.getPformats(), inst.getPsizes(), rrInst, mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case DATAGEN:
                if (ConfigurationManager.isDynamicRecompilation() && OptimizerUtils.ALLOW_RAND_JOB_RECOMPILE && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPDataGen(inst, rdInst)) {
                    ret = executeInMemoryDataGenOperations(inst, rdInst, outputMatrices);
                    Statistics.decrementNoOfExecutedMRJobs();
                    execCP = true;
                } else {
                    ret = DataGenMR.runJob(inst, rdInst.split(Lop.INSTRUCTION_DELIMITOR), mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                }
                break;
            case CM_COV:
                ret = CMCOVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case GROUPED_AGG:
                ret = GroupedAggMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case REBLOCK:
            case CSV_REBLOCK:
                if (ConfigurationManager.isDynamicRecompilation() && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPReblock(inst, inputMatrices)) {
                    ret = executeInMemoryReblockOperations(inst, shuffleInst, inputMatrices, outputMatrices);
                    Statistics.decrementNoOfExecutedMRJobs();
                    execCP = true;
                } else {
                    // export dirty matrices to HDFS (initially deferred)
                    for (MatrixObject m : inputMatrices) {
                        if (m.isDirty())
                            m.exportData();
                    }
                    checkEmptyInputs(inst, inputMatrices);
                    if (inst.getJobType() == JobType.REBLOCK) {
                        ret = ReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), getNNZ(inputMatrices), mapInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                    } else if (inst.getJobType() == JobType.CSV_REBLOCK) {
                        ret = CSVReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                    }
                }
                break;
            case CSV_WRITE:
                ret = WriteCSVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBclens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs());
                break;
            case MMCJ:
                ret = MMCJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0]);
                break;
            case MMRJ:
                ret = MMRJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case SORT:
                boolean weightsflag = true;
                if (!mapInst.equalsIgnoreCase(""))
                    weightsflag = false;
                ret = SortMR.runJob(inst, inst.getInputs()[0], inst.getInputInfos()[0], inst.getRlens()[0], inst.getClens()[0], inst.getBrlens()[0], inst.getBclens()[0], mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0], weightsflag);
                break;
            case COMBINE:
                ret = CombineMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case DATA_PARTITION:
                ret = DataPartitionMR.runJob(inst, inputMatrices, shuffleInst, inst.getIv_resultIndices(), outputMatrices, inst.getIv_numReducers(), inst.getIv_replication());
                break;
            default:
                throw new DMLRuntimeException("Invalid jobtype: " + inst.getJobType());
        }
    }// end of try block
     catch (Exception e) {
        throw new DMLRuntimeException(e);
    }
    if (ret.checkReturnStatus()) {
        /*
			 * Check if any output is empty. If yes, create a dummy file. Needs
			 * to be done only in case of (1) CellOutputInfo and if not CP, or 
			 * (2) BinaryBlockOutputInfo if not CP and output empty blocks disabled.
			 */
        try {
            if (!execCP) {
                for (int i = 0; i < outputMatrices.length; i++) {
                    // get output meta data
                    MetaDataFormat meta = (MetaDataFormat) outputMatrices[i].getMetaData();
                    MatrixCharacteristics mc = meta.getMatrixCharacteristics();
                    OutputInfo outinfo = meta.getOutputInfo();
                    String fname = outputMatrices[i].getFileName();
                    if (MapReduceTool.isHDFSFileEmpty(fname)) {
                        // prepare output file
                        Path filepath = new Path(fname, "0-m-00000");
                        MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(outinfo);
                        writer.writeEmptyMatrixToHDFS(filepath.toString(), mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
                    }
                    outputMatrices[i].setHDFSFileExists(true);
                    if (inst.getJobType() != JobType.CSV_WRITE) {
                        // write out metadata file
                        // Currently, valueType information in not stored in MR instruction,
                        // since only DOUBLE matrices are supported ==> hard coded the value type information for now
                        MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, ret.getMetaData(i).getMatrixCharacteristics(), outinfo);
                    }
                }
            }
            return ret;
        } catch (IOException e) {
            throw new DMLRuntimeException(e);
        }
    }
    // should not come here!
    throw new DMLRuntimeException("Unexpected Job Type: " + inst.getJobType());
}
Also used : Path(org.apache.hadoop.fs.Path) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) IOException(java.io.IOException) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixWriter(org.apache.sysml.runtime.io.MatrixWriter)

Example 53 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.

the class VariableCPInstruction method writeMMFile.

/**
 * Helper function to write MM files to HDFS.
 *
 * @param ec execution context
 * @param fname file name
 */
private void writeMMFile(ExecutionContext ec, String fname) {
    MatrixObject mo = ec.getMatrixObject(getInput1().getName());
    String outFmt = "matrixmarket";
    if (mo.isDirty()) {
        // there exist data computed in CP that is not backed up on HDFS
        // i.e., it is either in-memory or in evicted space
        mo.exportData(fname, outFmt);
    } else {
        OutputInfo oi = ((MetaDataFormat) mo.getMetaData()).getOutputInfo();
        MatrixCharacteristics mc = mo.getMatrixCharacteristics();
        if (oi == OutputInfo.TextCellOutputInfo) {
            try {
                WriterMatrixMarket.mergeTextcellToMatrixMarket(mo.getFileName(), fname, mc.getRows(), mc.getCols(), mc.getNonZeros());
            } catch (IOException e) {
                throw new DMLRuntimeException(e);
            }
        } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
            mo.exportData(fname, outFmt);
        } else {
            throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into MatrixMarket format.");
        }
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 54 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.

the class VariableCPInstruction method writeCSVFile.

/**
 * Helper function to write CSV files to HDFS.
 *
 * @param ec execution context
 * @param fname file name
 */
private void writeCSVFile(ExecutionContext ec, String fname) {
    MatrixObject mo = ec.getMatrixObject(getInput1().getName());
    String outFmt = "csv";
    if (mo.isDirty()) {
        // there exist data computed in CP that is not backed up on HDFS
        // i.e., it is either in-memory or in evicted space
        mo.exportData(fname, outFmt, _formatProperties);
    } else {
        try {
            OutputInfo oi = ((MetaDataFormat) mo.getMetaData()).getOutputInfo();
            MatrixCharacteristics mc = ((MetaDataFormat) mo.getMetaData()).getMatrixCharacteristics();
            if (oi == OutputInfo.CSVOutputInfo) {
                WriterTextCSV writer = new WriterTextCSV((CSVFileFormatProperties) _formatProperties);
                writer.addHeaderToCSV(mo.getFileName(), fname, mc.getRows(), mc.getCols());
            } else if (oi == OutputInfo.BinaryBlockOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
                mo.exportData(fname, outFmt, _formatProperties);
            } else {
                throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into CSV format.");
            }
            // Write Metadata file
            MapReduceTool.writeMetaDataFile(fname + ".mtd", mo.getValueType(), mc, OutputInfo.CSVOutputInfo, _formatProperties);
        } catch (IOException e) {
            throw new DMLRuntimeException(e);
        }
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) WriterTextCSV(org.apache.sysml.runtime.io.WriterTextCSV) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 55 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.

the class FrameObject method writeBlobFromRDDtoHDFS.

@Override
protected void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String ofmt) throws IOException, DMLRuntimeException {
    // prepare output info
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    OutputInfo oinfo = (ofmt != null ? OutputInfo.stringToOutputInfo(ofmt) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
    // note: the write of an RDD to HDFS might trigger
    // lazy evaluation of pending transformations.
    SparkExecutionContext.writeFrameRDDtoHDFS(rdd, fname, oinfo);
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat)

Aggregations

OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)69 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)34 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)30 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)28 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)25 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 IOException (java.io.IOException)16 ValueType (org.apache.sysml.parser.Expression.ValueType)10 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)10 HashMap (java.util.HashMap)6 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)6 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)5 Matrix (org.apache.sysml.udf.Matrix)5 Scalar (org.apache.sysml.udf.Scalar)5 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JobConf (org.apache.hadoop.mapred.JobConf)4 RunningJob (org.apache.hadoop.mapred.RunningJob)4 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)4 LopsException (org.apache.sysml.lops.LopsException)4