Search in sources :

Example 46 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class WriteSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get filename (literal or variable expression)
    String fname = ec.getScalarInput(input2.getName(), ValueType.STRING, input2.isLiteral()).getStringValue();
    String desc = ec.getScalarInput(input4.getName(), ValueType.STRING, input4.isLiteral()).getStringValue();
    formatProperties.setDescription(desc);
    ValueType[] schema = (input1.getDataType() == DataType.FRAME) ? sec.getFrameObject(input1.getName()).getSchema() : null;
    try {
        // if the file already exists on HDFS, remove it.
        MapReduceTool.deleteFileIfExistOnHDFS(fname);
        // prepare output info according to meta data
        String outFmt = input3.getName();
        OutputInfo oi = OutputInfo.stringToOutputInfo(outFmt);
        // core matrix/frame write
        if (input1.getDataType() == DataType.MATRIX)
            processMatrixWriteInstruction(sec, fname, oi);
        else
            processFrameWriteInstruction(sec, fname, oi, schema);
    } catch (IOException ex) {
        throw new DMLRuntimeException("Failed to process write instruction", ex);
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) ValueType(org.apache.sysml.parser.Expression.ValueType) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 47 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class WriteCSVMR method runJob.

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String csvWriteInstructions, int numReducers, int replication, byte[] resultIndexes, String[] outputs) throws Exception {
    JobConf job = new JobConf(WriteCSVMR.class);
    job.setJobName("WriteCSV-MR");
    // check for valid output dimensions
    for (int i = 0; i < rlens.length; i++) if (rlens[i] == 0 || clens[i] == 0)
        throw new IOException("Write of matrices with zero" + " rows or columns not supported (" + rlens[i] + "x" + clens[i] + ").");
    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    // set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, ConvertTarget.CSVWRITE);
    // set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
    // set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
    MRJobConfiguration.setCSVWriteInstructions(job, csvWriteInstructions);
    // set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);
    // set up custom map/reduce configurations
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    long maxRlen = 0;
    for (long rlen : rlens) if (rlen > maxRlen)
        maxRlen = rlen;
    // set up the number of reducers (according to output size)
    int numRed = determineNumReducers(rlens, clens, config.getIntValue(DMLConfig.NUM_REDUCERS), (int) maxRlen);
    job.setNumReduceTasks(numRed);
    byte[] resultDimsUnknown = new byte[resultIndexes.length];
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    OutputInfo[] outputInfos = new OutputInfo[outputs.length];
    HashMap<Byte, Integer> indexmap = new HashMap<>();
    for (int i = 0; i < stats.length; i++) {
        indexmap.put(resultIndexes[i], i);
        resultDimsUnknown[i] = (byte) 0;
        stats[i] = new MatrixCharacteristics();
        outputInfos[i] = OutputInfo.CSVOutputInfo;
    }
    CSVWriteInstruction[] ins = MRInstructionParser.parseCSVWriteInstructions(csvWriteInstructions);
    for (CSVWriteInstruction in : ins) stats[indexmap.get(in.output)].set(rlens[in.input], clens[in.input], -1, -1);
    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(stats);
    // set up what matrices are needed to pass from the mapper to reducer
    MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, "", "", csvWriteInstructions, resultIndexes);
    // set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, true);
    // configure mapper and the mapper output key value pairs
    job.setMapperClass(CSVWriteMapper.class);
    job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
    job.setMapOutputValueClass(MatrixBlock.class);
    // configure reducer
    job.setReducerClass(CSVWriteReducer.class);
    job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class);
    job.setPartitionerClass(TaggedFirstSecondIndexes.FirstIndexRangePartitioner.class);
    // job.setOutputFormat(UnPaddedOutputFormat.class);
    MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
    }
    // set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    RunningJob runjob = JobClient.runJob(job);
    /* Process different counters */
    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    for (int i = 0; i < resultIndexes.length; i++) {
        // number of non-zeros
        stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
    }
    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
Also used : Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) HashMap(java.util.HashMap) IOException(java.io.IOException) TaggedFirstSecondIndexes(org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) CSVWriteInstruction(org.apache.sysml.runtime.instructions.mr.CSVWriteInstruction)

Example 48 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.

the class ResultMergeLocalMemory method createNewMatrixObject.

private MatrixObject createNewMatrixObject(MatrixBlock data) {
    ValueType vt = _output.getValueType();
    MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
    MatrixObject moNew = new MatrixObject(vt, _outputFName);
    // create deep copy of metadata obj
    MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
    OutputInfo oiOld = metadata.getOutputInfo();
    InputInfo iiOld = metadata.getInputInfo();
    MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
    mc.setNonZeros(data.getNonZeros());
    MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
    moNew.setMetaData(meta);
    // adjust dense/sparse representation
    data.examSparsity();
    // release new output
    moNew.acquireModify(data);
    moNew.release();
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) ValueType(org.apache.sysml.parser.Expression.ValueType) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 49 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.

the class ResultMergeRemoteMR method executeParallelMerge.

@Override
public MatrixObject executeParallelMerge(int par) {
    // always create new matrix object (required for nested parallelism)
    MatrixObject moNew = null;
    if (LOG.isTraceEnabled())
        LOG.trace("ResultMerge (remote, mr): Execute serial merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
    try {
        // collect all relevant inputs
        Collection<String> srcFnames = new LinkedList<>();
        ArrayList<MatrixObject> inMO = new ArrayList<>();
        for (MatrixObject in : _inputs) {
            // check for empty inputs (no iterations executed)
            if (in != null && in != _output) {
                // ensure that input file resides on disk
                in.exportData();
                // add to merge list
                srcFnames.add(in.getFileName());
                inMO.add(in);
            }
        }
        if (!srcFnames.isEmpty()) {
            // ensure that outputfile (for comparison) resides on disk
            _output.exportData();
            // actual merge
            MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
            MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
            String fnameCompare = _output.getFileName();
            if (mcOld.getNonZeros() == 0)
                // no compare required
                fnameCompare = null;
            executeMerge(fnameCompare, _outputFName, srcFnames.toArray(new String[0]), metadata.getInputInfo(), metadata.getOutputInfo(), mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
            // create new output matrix (e.g., to prevent potential export<->read file access conflict
            moNew = new MatrixObject(_output.getValueType(), _outputFName);
            OutputInfo oiOld = metadata.getOutputInfo();
            InputInfo iiOld = metadata.getInputInfo();
            MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
            mc.setNonZeros(_isAccum ? -1 : computeNonZeros(_output, inMO));
            MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
            moNew.setMetaData(meta);
        } else {
            // return old matrix, to prevent copy
            moNew = _output;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 50 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project systemml by apache.

the class DynamicWriteMatrixCP method execute.

@Override
public void execute() {
    boolean success = false;
    try {
        Matrix mat = (Matrix) this.getFunctionInput(0);
        String fname = ((Scalar) this.getFunctionInput(1)).getValue();
        String format = ((Scalar) this.getFunctionInput(2)).getValue();
        MatrixObject mo = mat.getMatrixObject();
        MatrixCharacteristics mc = mo.getMatrixCharacteristics();
        OutputInfo oi = OutputInfo.stringToOutputInfo(format);
        MatrixBlock mb = mo.acquireRead();
        DataConverter.writeMatrixToHDFS(mb, fname, oi, mc);
        mo.release();
        success = true;
    } catch (Exception e) {
        throw new RuntimeException("Error executing dynamic write of matrix", e);
    }
    _success = new Scalar(ScalarValueType.Boolean, String.valueOf(success));
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Matrix(org.apache.sysml.udf.Matrix) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) Scalar(org.apache.sysml.udf.Scalar) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)69 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)34 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)30 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)28 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)25 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 IOException (java.io.IOException)16 ValueType (org.apache.sysml.parser.Expression.ValueType)10 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)10 HashMap (java.util.HashMap)6 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)6 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)5 Matrix (org.apache.sysml.udf.Matrix)5 Scalar (org.apache.sysml.udf.Scalar)5 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JobConf (org.apache.hadoop.mapred.JobConf)4 RunningJob (org.apache.hadoop.mapred.RunningJob)4 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)4 LopsException (org.apache.sysml.lops.LopsException)4