Search in sources :

Example 6 with JobReturn

use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.

the class MergedMRJobInstruction method constructJobReturn.

public JobReturn constructJobReturn(long instID, JobReturn retAll) {
    // get output offset and len
    int off = outIxOffs.get(instID);
    int len = outIxLens.get(instID);
    // create partial output meta data
    JobReturn ret = new JobReturn();
    ret.successful = retAll.successful;
    if (ret.successful) {
        ret.metadata = new MetaData[len];
        System.arraycopy(retAll.metadata, off, ret.metadata, 0, len);
    }
    return ret;
}
Also used : JobReturn(org.apache.sysml.runtime.matrix.JobReturn)

Example 7 with JobReturn

use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.

the class RunMRJobs method executeInMemoryReblockOperations.

private static JobReturn executeInMemoryReblockOperations(MRJobInstruction inst, String shuffleInst, MatrixObject[] inputMatrices, MatrixObject[] outputMatrices) {
    MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
    ReblockInstruction[] rblkSet = MRInstructionParser.parseReblockInstructions(shuffleInst);
    byte[] results = inst.getIv_resultIndices();
    for (ReblockInstruction rblk : rblkSet) {
        // CP Reblock through caching framework (no copy required: same data, next op copies)
        MatrixBlock mb = inputMatrices[rblk.input].acquireRead();
        for (int i = 0; i < results.length; i++) if (rblk.output == results[i]) {
            outputMatrices[i].acquireModify(mb);
            outputMatrices[i].release();
            mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), rblk.brlen, rblk.bclen, mb.getNonZeros());
        }
        inputMatrices[rblk.input].release();
    }
    return new JobReturn(mc, inst.getOutputInfos(), true);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 8 with JobReturn

use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.

the class RunMRJobs method prepareAndSubmitJob.

/**
 * Wrapper for submitting MR job instructions incl preparation and actual submission.
 * The preparation includes (1) pulling stats out of symbol table and populating the
 * instruction, (2) instruction patching, and (3) export of in-memory matrices if
 * required.
 *
 * Furthermore, this wrapper also provides a hook for runtime piggybacking to intercept
 * concurrent job submissions in order to collect and merge instructions.
 *
 * @param inst instruction
 * @param ec execution context
 * @return job status
 */
public static JobReturn prepareAndSubmitJob(MRJobInstruction inst, ExecutionContext ec) {
    // Obtain references to all input matrices
    MatrixObject[] inputMatrices = inst.extractInputMatrices(ec);
    // note: for REBLOCK postponed until we know if necessary
    if (!(inst.getJobType() == JobType.REBLOCK)) {
        // export matrices
        for (MatrixObject m : inputMatrices) {
            if (m.isDirty() || m.getRDDHandle() != null)
                m.exportData();
        }
        // check input files
        checkEmptyInputs(inst, inputMatrices);
    }
    // Obtain references to all output matrices
    inst.extractOutputMatrices(ec);
    // obtain original state
    String rdInst = inst.getIv_randInstructions();
    String rrInst = inst.getIv_recordReaderInstructions();
    String mapInst = inst.getIv_instructionsInMapper();
    String shuffleInst = inst.getIv_shuffleInstructions();
    String aggInst = inst.getIv_aggInstructions();
    String otherInst = inst.getIv_otherInstructions();
    // variable patching (replace placeholders with variables)
    inst.setIv_randInstructions(updateLabels(rdInst, ec.getVariables()));
    inst.setIv_recordReaderInstructions(updateLabels(rrInst, ec.getVariables()));
    inst.setIv_instructionsInMapper(updateLabels(mapInst, ec.getVariables()));
    inst.setIv_shuffleInstructions(updateLabels(shuffleInst, ec.getVariables()));
    inst.setIv_aggInstructions(updateLabels(aggInst, ec.getVariables()));
    inst.setIv_otherInstructions(updateLabels(otherInst, ec.getVariables()));
    // runtime piggybacking if applicable
    JobReturn ret = null;
    if (OptimizerUtils.ALLOW_RUNTIME_PIGGYBACKING && RuntimePiggybacking.isActive() && RuntimePiggybacking.isSupportedJobType(inst.getJobType())) {
        ret = RuntimePiggybacking.submitJob(inst);
    } else
        ret = submitJob(inst);
    // reset original state
    inst.setIv_randInstructions(rdInst);
    inst.setIv_recordReaderInstructions(rrInst);
    inst.setIv_instructionsInMapper(mapInst);
    inst.setIv_shuffleInstructions(shuffleInst);
    inst.setIv_aggInstructions(aggInst);
    inst.setIv_otherInstructions(otherInst);
    return ret;
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) JobReturn(org.apache.sysml.runtime.matrix.JobReturn)

Example 9 with JobReturn

use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.

the class RunMRJobs method executeInMemoryDataGenOperations.

private static JobReturn executeInMemoryDataGenOperations(MRJobInstruction inst, String randInst, MatrixObject[] outputMatrices) {
    MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
    DataGenMRInstruction[] dgSet = MRInstructionParser.parseDataGenInstructions(randInst);
    byte[] results = inst.getIv_resultIndices();
    for (DataGenMRInstruction ldgInst : dgSet) {
        if (ldgInst instanceof RandInstruction) {
            // CP Rand block operation
            RandInstruction lrand = (RandInstruction) ldgInst;
            RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(lrand.getProbabilityDensityFunction(), (int) lrand.getRows(), (int) lrand.getCols(), lrand.getRowsInBlock(), lrand.getColsInBlock(), lrand.getSparsity(), lrand.getMinValue(), lrand.getMaxValue(), lrand.getPdfParams());
            MatrixBlock mb = MatrixBlock.randOperations(rgen, lrand.getSeed());
            for (int i = 0; i < results.length; i++) if (lrand.output == results[i]) {
                outputMatrices[i].acquireModify(mb);
                outputMatrices[i].release();
                mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lrand.getRowsInBlock(), lrand.getColsInBlock(), mb.getNonZeros());
            }
        } else if (ldgInst instanceof SeqInstruction) {
            SeqInstruction lseq = (SeqInstruction) ldgInst;
            MatrixBlock mb = MatrixBlock.seqOperations(lseq.fromValue, lseq.toValue, lseq.incrValue);
            for (int i = 0; i < results.length; i++) if (lseq.output == results[i]) {
                outputMatrices[i].acquireModify(mb);
                outputMatrices[i].release();
                mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lseq.getRowsInBlock(), lseq.getColsInBlock(), mb.getNonZeros());
            }
        }
    }
    return new JobReturn(mc, inst.getOutputInfos(), true);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SeqInstruction(org.apache.sysml.runtime.instructions.mr.SeqInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 10 with JobReturn

use of org.apache.sysml.runtime.matrix.JobReturn in project incubator-systemml by apache.

the class RunMRJobs method submitJob.

/**
 * Submits an MR job instruction, without modifying any state of that instruction.
 *
 * @param inst instruction
 * @return job status
 */
public static JobReturn submitJob(MRJobInstruction inst) {
    JobReturn ret = new JobReturn();
    MatrixObject[] inputMatrices = inst.getInputMatrices();
    MatrixObject[] outputMatrices = inst.getOutputMatrices();
    boolean execCP = false;
    // Spawn MapReduce Jobs
    try {
        // replace all placeholders in all instructions with appropriate values
        String rdInst = inst.getIv_randInstructions();
        String rrInst = inst.getIv_recordReaderInstructions();
        String mapInst = inst.getIv_instructionsInMapper();
        String shuffleInst = inst.getIv_shuffleInstructions();
        String aggInst = inst.getIv_aggInstructions();
        String otherInst = inst.getIv_otherInstructions();
        boolean jvmReuse = ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.JVM_REUSE);
        switch(inst.getJobType()) {
            case GMR:
            case GMRCELL:
                ret = GMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), inst.getPartitioned(), inst.getPformats(), inst.getPsizes(), rrInst, mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case DATAGEN:
                if (ConfigurationManager.isDynamicRecompilation() && OptimizerUtils.ALLOW_RAND_JOB_RECOMPILE && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPDataGen(inst, rdInst)) {
                    ret = executeInMemoryDataGenOperations(inst, rdInst, outputMatrices);
                    Statistics.decrementNoOfExecutedMRJobs();
                    execCP = true;
                } else {
                    ret = DataGenMR.runJob(inst, rdInst.split(Lop.INSTRUCTION_DELIMITOR), mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                }
                break;
            case CM_COV:
                ret = CMCOVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case GROUPED_AGG:
                ret = GroupedAggMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case REBLOCK:
            case CSV_REBLOCK:
                if (ConfigurationManager.isDynamicRecompilation() && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPReblock(inst, inputMatrices)) {
                    ret = executeInMemoryReblockOperations(inst, shuffleInst, inputMatrices, outputMatrices);
                    Statistics.decrementNoOfExecutedMRJobs();
                    execCP = true;
                } else {
                    // export dirty matrices to HDFS (initially deferred)
                    for (MatrixObject m : inputMatrices) {
                        if (m.isDirty())
                            m.exportData();
                    }
                    checkEmptyInputs(inst, inputMatrices);
                    if (inst.getJobType() == JobType.REBLOCK) {
                        ret = ReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), getNNZ(inputMatrices), mapInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                    } else if (inst.getJobType() == JobType.CSV_REBLOCK) {
                        ret = CSVReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                    }
                }
                break;
            case CSV_WRITE:
                ret = WriteCSVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBclens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs());
                break;
            case MMCJ:
                ret = MMCJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0]);
                break;
            case MMRJ:
                ret = MMRJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case SORT:
                boolean weightsflag = true;
                if (!mapInst.equalsIgnoreCase(""))
                    weightsflag = false;
                ret = SortMR.runJob(inst, inst.getInputs()[0], inst.getInputInfos()[0], inst.getRlens()[0], inst.getClens()[0], inst.getBrlens()[0], inst.getBclens()[0], mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0], weightsflag);
                break;
            case COMBINE:
                ret = CombineMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case DATA_PARTITION:
                ret = DataPartitionMR.runJob(inst, inputMatrices, shuffleInst, inst.getIv_resultIndices(), outputMatrices, inst.getIv_numReducers(), inst.getIv_replication());
                break;
            default:
                throw new DMLRuntimeException("Invalid jobtype: " + inst.getJobType());
        }
    }// end of try block
     catch (Exception e) {
        throw new DMLRuntimeException(e);
    }
    if (ret.checkReturnStatus()) {
        /*
			 * Check if any output is empty. If yes, create a dummy file. Needs
			 * to be done only in case of (1) CellOutputInfo and if not CP, or 
			 * (2) BinaryBlockOutputInfo if not CP and output empty blocks disabled.
			 */
        try {
            if (!execCP) {
                for (int i = 0; i < outputMatrices.length; i++) {
                    // get output meta data
                    MetaDataFormat meta = (MetaDataFormat) outputMatrices[i].getMetaData();
                    MatrixCharacteristics mc = meta.getMatrixCharacteristics();
                    OutputInfo outinfo = meta.getOutputInfo();
                    String fname = outputMatrices[i].getFileName();
                    if (MapReduceTool.isHDFSFileEmpty(fname)) {
                        // prepare output file
                        Path filepath = new Path(fname, "0-m-00000");
                        MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(outinfo);
                        writer.writeEmptyMatrixToHDFS(filepath.toString(), mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
                    }
                    outputMatrices[i].setHDFSFileExists(true);
                    if (inst.getJobType() != JobType.CSV_WRITE) {
                        // write out metadata file
                        // Currently, valueType information in not stored in MR instruction,
                        // since only DOUBLE matrices are supported ==> hard coded the value type information for now
                        MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, ret.getMetaData(i).getMatrixCharacteristics(), outinfo);
                    }
                }
            }
            return ret;
        } catch (IOException e) {
            throw new DMLRuntimeException(e);
        }
    }
    // should not come here!
    throw new DMLRuntimeException("Unexpected Job Type: " + inst.getJobType());
}
Also used : Path(org.apache.hadoop.fs.Path) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) IOException(java.io.IOException) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixWriter(org.apache.sysml.runtime.io.MatrixWriter)

Aggregations

JobReturn (org.apache.sysml.runtime.matrix.JobReturn)14 Path (org.apache.hadoop.fs.Path)6 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 JobConf (org.apache.hadoop.mapred.JobConf)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)3 ArrayList (java.util.ArrayList)2 LinkedList (java.util.LinkedList)2 RunningJob (org.apache.hadoop.mapred.RunningJob)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2 CSVReblockInstruction (org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction)2 BufferedReader (java.io.BufferedReader)1 BufferedWriter (java.io.BufferedWriter)1 IOException (java.io.IOException)1 InputStreamReader (java.io.InputStreamReader)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Group (org.apache.hadoop.mapred.Counters.Group)1 DMLConfig (org.apache.sysml.conf.DMLConfig)1 DataGenMRInstruction (org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction)1