Examples with OutputInfo - org.apache.sysml.runtime.matrix.data.OutputInfo

Example 16 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class TransformEncodeDecodeTest method runTransformEncodeDecodeTest.

private void runTransformEncodeDecodeTest(ExecType et, boolean sparse, String fmt) {
    RUNTIME_PLATFORM platformOld = rtplatform;
    // only CP supported
    rtplatform = RUNTIME_PLATFORM.HYBRID;
    try {
        getAndLoadTestConfiguration(TEST_NAME1);
        // get input/output info
        InputInfo iinfo = InputInfo.stringExternalToInputInfo(fmt);
        OutputInfo oinfo = InputInfo.getMatchingOutputInfo(iinfo);
        // generate and write input data
        double[][] A = TestUtils.round(getRandomMatrix(rows, cols, 1, 15, sparse ? sparsity2 : sparsity1, 7));
        FrameBlock FA = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(A));
        FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
        writer.writeFrameToHDFS(FA, input("F"), rows, cols);
        fullDMLScriptName = SCRIPT_DIR + TEST_DIR + TEST_NAME1 + ".dml";
        programArgs = new String[] { "-explain", "-args", input("F"), fmt, String.valueOf(rows), String.valueOf(cols), SCRIPT_DIR + TEST_DIR + SPEC, output("FO") };
        // run test
        runTest(true, false, null, -1);
        // compare matrices (values recoded to identical codes)
        FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
        FrameBlock FO = reader.readFrameFromHDFS(output("FO"), 16, 2);
        HashMap<String, Long> cFA = getCounts(FA, 1);
        Iterator<String[]> iterFO = FO.getStringRowIterator();
        while (iterFO.hasNext()) {
            String[] row = iterFO.next();
            Double expected = (double) cFA.get(row[1]);
            Double val = (row[0] != null) ? Double.valueOf(row[0]) : 0;
            Assert.assertEquals("Output aggregates don't match: " + expected + " vs " + val, expected, val);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    } finally {
        rtplatform = platformOld;
    }
}

Also used : FrameWriter(org.apache.sysml.runtime.io.FrameWriter) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 17 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class Dag method generateMapReduceInstructions.

/**
 * Method to generate MapReduce job instructions from a given set of nodes.
 *
 * @param execNodes list of exec nodes
 * @param inst list of instructions
 * @param writeinst list of write instructions
 * @param deleteinst list of delete instructions
 * @param rmvarinst list of rmvar instructions
 * @param jt job type
 */
private void generateMapReduceInstructions(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeinst, ArrayList<Instruction> deleteinst, ArrayList<Instruction> rmvarinst, JobType jt) {
    ArrayList<Byte> resultIndices = new ArrayList<>();
    ArrayList<String> inputs = new ArrayList<>();
    ArrayList<String> outputs = new ArrayList<>();
    ArrayList<InputInfo> inputInfos = new ArrayList<>();
    ArrayList<OutputInfo> outputInfos = new ArrayList<>();
    ArrayList<Long> numRows = new ArrayList<>();
    ArrayList<Long> numCols = new ArrayList<>();
    ArrayList<Long> numRowsPerBlock = new ArrayList<>();
    ArrayList<Long> numColsPerBlock = new ArrayList<>();
    ArrayList<String> mapperInstructions = new ArrayList<>();
    ArrayList<String> randInstructions = new ArrayList<>();
    ArrayList<String> recordReaderInstructions = new ArrayList<>();
    int numReducers = 0;
    int replication = 1;
    ArrayList<String> inputLabels = new ArrayList<>();
    ArrayList<String> outputLabels = new ArrayList<>();
    ArrayList<Instruction> renameInstructions = new ArrayList<>();
    ArrayList<Instruction> variableInstructions = new ArrayList<>();
    ArrayList<Instruction> postInstructions = new ArrayList<>();
    ArrayList<Integer> MRJobLineNumbers = null;
    if (DMLScript.ENABLE_DEBUG_MODE) {
        MRJobLineNumbers = new ArrayList<>();
    }
    ArrayList<Lop> inputLops = new ArrayList<>();
    boolean cellModeOverride = false;
    /* Find the nodes that produce an output */
    ArrayList<Lop> rootNodes = new ArrayList<>();
    getOutputNodes(execNodes, rootNodes, jt);
    if (LOG.isTraceEnabled())
        LOG.trace("# of root nodes = " + rootNodes.size());
    /* Remove transient writes that are simple copy of transient reads */
    if (jt == JobType.GMR || jt == JobType.GMRCELL) {
        ArrayList<Lop> markedNodes = new ArrayList<>();
        // only keep data nodes that are results of some computation.
        for (Lop rnode : rootNodes) {
            if (rnode.getExecLocation() == ExecLocation.Data && ((Data) rnode).isTransient() && ((Data) rnode).getOperationType() == OperationTypes.WRITE && ((Data) rnode).getDataType() == DataType.MATRIX) {
                // no computation, just a copy
                if (rnode.getInputs().get(0).getExecLocation() == ExecLocation.Data && ((Data) rnode.getInputs().get(0)).isTransient() && rnode.getOutputParameters().getLabel().equals(rnode.getInputs().get(0).getOutputParameters().getLabel())) {
                    markedNodes.add(rnode);
                }
            }
        }
        // delete marked nodes
        rootNodes.removeAll(markedNodes);
        markedNodes.clear();
        if (rootNodes.isEmpty())
            return;
    }
    // structure that maps node to their indices that will be used in the instructions
    HashMap<Lop, Integer> nodeIndexMapping = new HashMap<>();
    for (Lop rnode : rootNodes) {
        getInputPathsAndParameters(rnode, execNodes, inputs, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
    }
    // In case of RAND job, instructions are defined in the input file
    if (jt == JobType.DATAGEN)
        randInstructions = inputs;
    int[] start_index = new int[1];
    start_index[0] = inputs.size();
    // currently, recordreader instructions are allowed only in GMR jobs
    if (jt == JobType.GMR || jt == JobType.GMRCELL) {
        for (Lop rnode : rootNodes) {
            getRecordReaderInstructions(rnode, execNodes, inputs, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
            if (recordReaderInstructions.size() > 1)
                throw new LopsException("MapReduce job can only have a single recordreader instruction: " + recordReaderInstructions.toString());
        }
    }
    // 
    if (jt != JobType.REBLOCK && jt != JobType.CSV_REBLOCK && jt != JobType.DATAGEN) {
        for (int i = 0; i < inputInfos.size(); i++) if (inputInfos.get(i) == InputInfo.BinaryCellInputInfo || inputInfos.get(i) == InputInfo.TextCellInputInfo)
            cellModeOverride = true;
    }
    if (!recordReaderInstructions.isEmpty() || jt == JobType.GROUPED_AGG)
        cellModeOverride = true;
    for (int i = 0; i < rootNodes.size(); i++) {
        getMapperInstructions(rootNodes.get(i), execNodes, inputs, mapperInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("    Input strings: " + inputs.toString());
        if (jt == JobType.DATAGEN)
            LOG.trace("    Rand instructions: " + getCSVString(randInstructions));
        if (jt == JobType.GMR)
            LOG.trace("    RecordReader instructions: " + getCSVString(recordReaderInstructions));
        LOG.trace("    Mapper instructions: " + getCSVString(mapperInstructions));
    }
    /* Get Shuffle and Reducer Instructions */
    ArrayList<String> shuffleInstructions = new ArrayList<>();
    ArrayList<String> aggInstructionsReducer = new ArrayList<>();
    ArrayList<String> otherInstructionsReducer = new ArrayList<>();
    for (Lop rn : rootNodes) {
        int resultIndex = getAggAndOtherInstructions(rn, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
        if (resultIndex == -1)
            throw new LopsException("Unexpected error in piggybacking!");
        if (rn.getExecLocation() == ExecLocation.Data && ((Data) rn).getOperationType() == Data.OperationTypes.WRITE && ((Data) rn).isTransient() && rootNodes.contains(rn.getInputs().get(0))) {
            // Both rn (a transient write) and its input are root nodes.
            // Instead of creating two copies of the data, simply generate a cpvar instruction
            NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, true);
            writeinst.addAll(out.getLastInstructions());
        } else {
            resultIndices.add(Byte.valueOf((byte) resultIndex));
            // setup output filenames and outputInfos and generate related instructions
            NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, false);
            outputLabels.add(out.getVarName());
            outputs.add(out.getFileName());
            outputInfos.add(out.getOutInfo());
            if (LOG.isTraceEnabled()) {
                LOG.trace("    Output Info: " + out.getFileName() + ";" + OutputInfo.outputInfoToString(out.getOutInfo()) + ";" + out.getVarName());
            }
            renameInstructions.addAll(out.getLastInstructions());
            variableInstructions.addAll(out.getPreInstructions());
            postInstructions.addAll(out.getPostInstructions());
        }
    }
    /* Determine if the output dimensions are known */
    byte[] resultIndicesByte = new byte[resultIndices.size()];
    for (int i = 0; i < resultIndicesByte.length; i++) {
        resultIndicesByte[i] = resultIndices.get(i).byteValue();
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("    Shuffle Instructions: " + getCSVString(shuffleInstructions));
        LOG.trace("    Aggregate Instructions: " + getCSVString(aggInstructionsReducer));
        LOG.trace("    Other instructions =" + getCSVString(otherInstructionsReducer));
        LOG.trace("    Output strings: " + outputs.toString());
        LOG.trace("    ResultIndices = " + resultIndices.toString());
    }
    /* Prepare the MapReduce job instruction */
    MRJobInstruction mr = new MRJobInstruction(jt);
    // check if this is a map-only job. If not, set the number of reducers
    if (!shuffleInstructions.isEmpty() || !aggInstructionsReducer.isEmpty() || !otherInstructionsReducer.isEmpty())
        numReducers = total_reducers;
    // set inputs, outputs, and other other properties for the job
    mr.setInputOutputLabels(inputLabels.toArray(new String[0]), outputLabels.toArray(new String[0]));
    mr.setOutputs(resultIndicesByte);
    mr.setDimsUnknownFilePrefix(getFilePath());
    mr.setNumberOfReducers(numReducers);
    mr.setReplication(replication);
    // set instructions for recordReader and mapper
    mr.setRecordReaderInstructions(getCSVString(recordReaderInstructions));
    mr.setMapperInstructions(getCSVString(mapperInstructions));
    // compute and set mapper memory requirements (for consistency of runtime piggybacking)
    if (jt == JobType.GMR) {
        double mem = 0;
        for (Lop n : execNodes) mem += computeFootprintInMapper(n);
        mr.setMemoryRequirements(mem);
    }
    if (jt == JobType.DATAGEN)
        mr.setRandInstructions(getCSVString(randInstructions));
    // set shuffle instructions
    mr.setShuffleInstructions(getCSVString(shuffleInstructions));
    // set reducer instruction
    mr.setAggregateInstructionsInReducer(getCSVString(aggInstructionsReducer));
    mr.setOtherInstructionsInReducer(getCSVString(otherInstructionsReducer));
    if (DMLScript.ENABLE_DEBUG_MODE) {
        // set line number information for each MR instruction
        mr.setMRJobInstructionsLineNumbers(MRJobLineNumbers);
    }
    /* Add the prepared instructions to output set */
    inst.addAll(variableInstructions);
    inst.add(mr);
    inst.addAll(postInstructions);
    deleteinst.addAll(renameInstructions);
    for (Lop l : inputLops) {
        if (DMLScript.ENABLE_DEBUG_MODE) {
            processConsumers(l, rmvarinst, deleteinst, l);
        } else {
            processConsumers(l, rmvarinst, deleteinst, null);
        }
    }
}

Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) LopsException(org.apache.sysml.lops.LopsException)

Example 18 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class Dag method getOutputInfo.

/**
 * Method that determines the output format for a given node.
 *
 * @param node low-level operator
 * @param cellModeOverride override mode
 * @return output info
 */
private static OutputInfo getOutputInfo(Lop node, boolean cellModeOverride) {
    if ((node.getDataType() == DataType.SCALAR && node.getExecType() == ExecType.CP) || node instanceof FunctionCallCP)
        return null;
    OutputInfo oinfo = null;
    OutputParameters oparams = node.getOutputParameters();
    if (oparams.isBlocked()) {
        if (!cellModeOverride)
            oinfo = OutputInfo.BinaryBlockOutputInfo;
        else {
            // output format is overridden, for example, due to recordReaderInstructions in the job
            oinfo = OutputInfo.BinaryCellOutputInfo;
            // which stores the outputInfo.
            try {
                oparams.setDimensions(oparams.getNumRows(), oparams.getNumCols(), -1, -1, oparams.getNnz(), oparams.getUpdateType());
            } catch (HopsException e) {
                throw new LopsException(node.printErrorLocation() + "error in getOutputInfo in Dag ", e);
            }
        }
    } else {
        if (oparams.getFormat() == Format.TEXT || oparams.getFormat() == Format.MM)
            oinfo = OutputInfo.TextCellOutputInfo;
        else if (oparams.getFormat() == Format.CSV) {
            oinfo = OutputInfo.CSVOutputInfo;
        } else {
            oinfo = OutputInfo.BinaryCellOutputInfo;
        }
    }
    /* Instead of following hardcoding, one must get this information from Lops */
    if (node.getType() == Type.SortKeys && node.getExecType() == ExecType.MR) {
        if (((SortKeys) node).getOpType() == SortKeys.OperationTypes.Indexes)
            oinfo = OutputInfo.BinaryBlockOutputInfo;
        else
            oinfo = OutputInfo.OutputInfoForSortOutput;
    } else if (node.getType() == Type.CombineBinary) {
        // Output format of CombineBinary (CB) depends on how the output is consumed
        CombineBinary combine = (CombineBinary) node;
        if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreSort) {
            oinfo = OutputInfo.OutputInfoForSortInput;
        } else if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCentralMoment || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCovUnweighted || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreGroupedAggUnweighted) {
            oinfo = OutputInfo.WeightedPairOutputInfo;
        }
    } else if (node.getType() == Type.CombineTernary) {
        oinfo = OutputInfo.WeightedPairOutputInfo;
    } else if (node.getType() == Type.CentralMoment || node.getType() == Type.CoVariance) {
        // CMMR always operate in "cell mode",
        // and the output is always in cell format
        oinfo = OutputInfo.BinaryCellOutputInfo;
    }
    return oinfo;
}

Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) CombineBinary(org.apache.sysml.lops.CombineBinary) OutputParameters(org.apache.sysml.lops.OutputParameters) LopsException(org.apache.sysml.lops.LopsException) FunctionCallCP(org.apache.sysml.lops.FunctionCallCP) HopsException(org.apache.sysml.hops.HopsException)

Example 19 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class RunMRJobs method submitJob.

/**
 * Submits an MR job instruction, without modifying any state of that instruction.
 *
 * @param inst instruction
 * @return job status
 */
public static JobReturn submitJob(MRJobInstruction inst) {
    JobReturn ret = new JobReturn();
    MatrixObject[] inputMatrices = inst.getInputMatrices();
    MatrixObject[] outputMatrices = inst.getOutputMatrices();
    boolean execCP = false;
    // Spawn MapReduce Jobs
    try {
        // replace all placeholders in all instructions with appropriate values
        String rdInst = inst.getIv_randInstructions();
        String rrInst = inst.getIv_recordReaderInstructions();
        String mapInst = inst.getIv_instructionsInMapper();
        String shuffleInst = inst.getIv_shuffleInstructions();
        String aggInst = inst.getIv_aggInstructions();
        String otherInst = inst.getIv_otherInstructions();
        boolean jvmReuse = ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.JVM_REUSE);
        switch(inst.getJobType()) {
            case GMR:
            case GMRCELL:
                ret = GMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), inst.getPartitioned(), inst.getPformats(), inst.getPsizes(), rrInst, mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case DATAGEN:
                if (ConfigurationManager.isDynamicRecompilation() && OptimizerUtils.ALLOW_RAND_JOB_RECOMPILE && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPDataGen(inst, rdInst)) {
                    ret = executeInMemoryDataGenOperations(inst, rdInst, outputMatrices);
                    Statistics.decrementNoOfExecutedMRJobs();
                    execCP = true;
                } else {
                    ret = DataGenMR.runJob(inst, rdInst.split(Lop.INSTRUCTION_DELIMITOR), mapInst, aggInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                }
                break;
            case CM_COV:
                ret = CMCOVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case GROUPED_AGG:
                ret = GroupedAggMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getDimsUnknownFilePrefix(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case REBLOCK:
            case CSV_REBLOCK:
                if (ConfigurationManager.isDynamicRecompilation() && DMLScript.rtplatform != RUNTIME_PLATFORM.HADOOP && Recompiler.checkCPReblock(inst, inputMatrices)) {
                    ret = executeInMemoryReblockOperations(inst, shuffleInst, inputMatrices, outputMatrices);
                    Statistics.decrementNoOfExecutedMRJobs();
                    execCP = true;
                } else {
                    // export dirty matrices to HDFS (initially deferred)
                    for (MatrixObject m : inputMatrices) {
                        if (m.isDirty())
                            m.exportData();
                    }
                    checkEmptyInputs(inst, inputMatrices);
                    if (inst.getJobType() == JobType.REBLOCK) {
                        ret = ReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), getNNZ(inputMatrices), mapInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), jvmReuse, inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                    } else if (inst.getJobType() == JobType.CSV_REBLOCK) {
                        ret = CSVReblockMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                    }
                }
                break;
            case CSV_WRITE:
                ret = WriteCSVMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBclens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs());
                break;
            case MMCJ:
                ret = MMCJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0]);
                break;
            case MMRJ:
                ret = MMRJMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), mapInst, aggInst, shuffleInst, otherInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case SORT:
                boolean weightsflag = true;
                if (!mapInst.equalsIgnoreCase(""))
                    weightsflag = false;
                ret = SortMR.runJob(inst, inst.getInputs()[0], inst.getInputInfos()[0], inst.getRlens()[0], inst.getClens()[0], inst.getBrlens()[0], inst.getBclens()[0], mapInst, shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getOutputs()[0], inst.getOutputInfos()[0], weightsflag);
                break;
            case COMBINE:
                ret = CombineMR.runJob(inst, inst.getInputs(), inst.getInputInfos(), inst.getRlens(), inst.getClens(), inst.getBrlens(), inst.getBclens(), shuffleInst, inst.getIv_numReducers(), inst.getIv_replication(), inst.getIv_resultIndices(), inst.getOutputs(), inst.getOutputInfos());
                break;
            case DATA_PARTITION:
                ret = DataPartitionMR.runJob(inst, inputMatrices, shuffleInst, inst.getIv_resultIndices(), outputMatrices, inst.getIv_numReducers(), inst.getIv_replication());
                break;
            default:
                throw new DMLRuntimeException("Invalid jobtype: " + inst.getJobType());
        }
    }// end of try block
     catch (Exception e) {
        throw new DMLRuntimeException(e);
    }
    if (ret.checkReturnStatus()) {
        /*
			 * Check if any output is empty. If yes, create a dummy file. Needs
			 * to be done only in case of (1) CellOutputInfo and if not CP, or 
			 * (2) BinaryBlockOutputInfo if not CP and output empty blocks disabled.
			 */
        try {
            if (!execCP) {
                for (int i = 0; i < outputMatrices.length; i++) {
                    // get output meta data
                    MetaDataFormat meta = (MetaDataFormat) outputMatrices[i].getMetaData();
                    MatrixCharacteristics mc = meta.getMatrixCharacteristics();
                    OutputInfo outinfo = meta.getOutputInfo();
                    String fname = outputMatrices[i].getFileName();
                    if (MapReduceTool.isHDFSFileEmpty(fname)) {
                        // prepare output file
                        Path filepath = new Path(fname, "0-m-00000");
                        MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(outinfo);
                        writer.writeEmptyMatrixToHDFS(filepath.toString(), mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
                    }
                    outputMatrices[i].setHDFSFileExists(true);
                    if (inst.getJobType() != JobType.CSV_WRITE) {
                        // write out metadata file
                        // Currently, valueType information in not stored in MR instruction,
                        // since only DOUBLE matrices are supported ==> hard coded the value type information for now
                        MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, ret.getMetaData(i).getMatrixCharacteristics(), outinfo);
                    }
                }
            }
            return ret;
        } catch (IOException e) {
            throw new DMLRuntimeException(e);
        }
    }
    // should not come here!
    throw new DMLRuntimeException("Unexpected Job Type: " + inst.getJobType());
}

Also used : Path(org.apache.hadoop.fs.Path) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) IOException(java.io.IOException) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixWriter(org.apache.sysml.runtime.io.MatrixWriter)

Example 20 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class JMLCInputStreamReadTest method runJMLCInputStreamReadTest.

private void runJMLCInputStreamReadTest(DataType dt, boolean sparse, String format, boolean metaData) throws IOException {
    TestConfiguration config = getTestConfiguration(TEST_NAME);
    loadTestConfiguration(config);
    // generate inputs
    OutputInfo oinfo = format.equals("csv") ? OutputInfo.CSVOutputInfo : OutputInfo.TextCellOutputInfo;
    double[][] data = TestUtils.round(getRandomMatrix(rows, cols, 0.51, 7.49, sparse ? sparsity2 : sparsity1, 7));
    Connection conn = new Connection();
    try {
        if (dt == DataType.MATRIX) {
            // write input matrix
            MatrixBlock mb = DataConverter.convertToMatrixBlock(data);
            MatrixWriter writer = MatrixWriterFactory.createMatrixWriter(oinfo);
            writer.writeMatrixToHDFS(mb, output("X"), rows, cols, -1, -1, -1);
            // read matrix from input stream
            FileInputStream fis = new FileInputStream(output("X"));
            double[][] data2 = conn.convertToDoubleMatrix(fis, rows, cols, format);
            fis.close();
            // compare matrix result
            TestUtils.compareMatrices(data, data2, rows, cols, 0);
        } else if (dt == DataType.FRAME) {
            // write input frame
            String[][] fdata = FrameTransformTest.createFrameData(data, "V");
            // test quoted tokens w/ inner quotes
            fdata[3][1] = "\"ab\"\"cdef\"";
            if (format.equals("csv"))
                // test delimiter and space tokens
                fdata[7][2] = "\"a,bc def\"";
            FrameBlock fb = DataConverter.convertToFrameBlock(fdata);
            if (metaData) {
                fb.setColumnNames(IntStream.range(0, cols).mapToObj(i -> "CC" + i).collect(Collectors.toList()).toArray(new String[0]));
            }
            FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
            writer.writeFrameToHDFS(fb, output("X"), rows, cols);
            // read frame from input stream
            FileInputStream fis = new FileInputStream(output("X"));
            String[][] fdata2 = conn.convertToStringFrame(fis, rows, cols, format);
            fis.close();
            // compare frame result
            TestUtils.compareFrames(fdata, fdata2, rows, cols);
        } else {
            throw new IOException("Unsupported data type: " + dt.name());
        }
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        MapReduceTool.deleteFileIfExistOnHDFS(output("X"));
        IOUtilFunctions.closeSilently(conn);
    }
}

Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) Connection(org.apache.sysml.api.jmlc.Connection) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) FrameWriter(org.apache.sysml.runtime.io.FrameWriter) IOException(java.io.IOException) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) MatrixWriter(org.apache.sysml.runtime.io.MatrixWriter)

Aggregations

OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)35 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)17 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)15 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)14 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)13 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)11 IOException (java.io.IOException)8 ValueType (org.apache.sysml.parser.Expression.ValueType)5 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)5 HashMap (java.util.HashMap)3 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)3 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)3 Matrix (org.apache.sysml.udf.Matrix)3 Scalar (org.apache.sysml.udf.Scalar)3 ArrayList (java.util.ArrayList)2 Path (org.apache.hadoop.fs.Path)2 JobConf (org.apache.hadoop.mapred.JobConf)2 RunningJob (org.apache.hadoop.mapred.RunningJob)2 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)2 LopsException (org.apache.sysml.lops.LopsException)2