Search in sources :

Example 36 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class ResultMergeLocalMemory method executeParallelMerge.

@Override
public MatrixObject executeParallelMerge(int par) {
    // always create new matrix object (required for nested parallelism)
    MatrixObject moNew = null;
    if (LOG.isTraceEnabled())
        LOG.trace("ResultMerge (local, in-memory): Execute parallel (par=" + par + ") " + "merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
    try {
        // get matrix blocks through caching
        MatrixBlock outMB = _output.acquireRead();
        ArrayList<MatrixObject> inMO = new ArrayList<>();
        for (MatrixObject in : _inputs) {
            // check for empty inputs (no iterations executed)
            if (in != null && in != _output)
                inMO.add(in);
        }
        if (// if there exist something to merge
        !inMO.isEmpty()) {
            // get old output matrix from cache for compare
            // NOTE: always in dense representation in order to allow for parallel unsynchronized access
            long rows = outMB.getNumRows();
            long cols = outMB.getNumColumns();
            MatrixBlock outMBNew = new MatrixBlock((int) rows, (int) cols, false);
            outMBNew.allocateDenseBlockUnsafe((int) rows, (int) cols);
            // create compare matrix if required (existing data in result)
            _compare = getCompareMatrix(outMB);
            if (_compare != null)
                outMBNew.copy(outMB);
            // parallel merge of all inputs
            // number of inputs can be lower than par
            int numThreads = Math.min(par, inMO.size());
            // ensure robustness for remote exec
            numThreads = Math.min(numThreads, InfrastructureAnalyzer.getLocalParallelism());
            Thread[] threads = new Thread[numThreads];
            for (// multiple waves if necessary
            int k = 0; // multiple waves if necessary
            k < inMO.size(); // multiple waves if necessary
            k += numThreads) {
                // create and start threads
                for (int i = 0; i < threads.length; i++) {
                    ResultMergeWorker rmw = new ResultMergeWorker(inMO.get(k + i), outMBNew);
                    threads[i] = new Thread(rmw);
                    threads[i].setPriority(Thread.MAX_PRIORITY);
                    // start execution
                    threads[i].start();
                }
                // wait for all workers to finish
                for (int i = 0; i < threads.length; i++) {
                    threads[i].join();
                }
            }
            // create new output matrix
            // (e.g., to prevent potential export<->read file access conflict in specific cases of
            // local-remote nested parfor))
            moNew = createNewMatrixObject(outMBNew);
        } else {
            // return old matrix, to prevent copy
            moNew = _output;
        }
        // release old output, and all inputs
        _output.release();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return moNew;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 37 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class ResultMergeRemoteSpark method executeParallelMerge.

@Override
public MatrixObject executeParallelMerge(int par) {
    // always create new matrix object (required for nested parallelism)
    MatrixObject moNew = null;
    if (LOG.isTraceEnabled())
        LOG.trace("ResultMerge (remote, spark): Execute serial merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
    try {
        if (_inputs != null && _inputs.length > 0) {
            // prepare compare
            MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
            MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
            MatrixObject compare = (mcOld.getNonZeros() == 0) ? null : _output;
            // actual merge
            RDDObject ro = executeMerge(compare, _inputs, mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
            // create new output matrix (e.g., to prevent potential export<->read file access conflict
            moNew = new MatrixObject(_output.getValueType(), _outputFName);
            OutputInfo oiOld = metadata.getOutputInfo();
            InputInfo iiOld = metadata.getInputInfo();
            MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
            mc.setNonZeros(_isAccum ? -1 : computeNonZeros(_output, Arrays.asList(_inputs)));
            MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
            moNew.setMetaData(meta);
            moNew.setRDDHandle(ro);
        } else {
            // return old matrix, to prevent copy
            moNew = _output;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 38 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class VariableCPInstruction method processWriteInstruction.

/**
 * Handler for write instructions.
 *
 * Non-native formats like MM and CSV are handled through specialized helper functions.
 * The default behavior is to write out the specified matrix from the instruction, in
 * the format given by the corresponding symbol table entry.
 *
 * @param ec execution context
 */
private void processWriteInstruction(ExecutionContext ec) {
    // get filename (literal or variable expression)
    String fname = ec.getScalarInput(getInput2().getName(), ValueType.STRING, getInput2().isLiteral()).getStringValue();
    String desc = ec.getScalarInput(getInput4().getName(), ValueType.STRING, getInput4().isLiteral()).getStringValue();
    _formatProperties.setDescription(desc);
    if (getInput1().getDataType() == DataType.SCALAR) {
        writeScalarToHDFS(ec, fname);
    } else if (getInput1().getDataType() == DataType.MATRIX) {
        String outFmt = getInput3().getName();
        if (outFmt.equalsIgnoreCase("matrixmarket"))
            writeMMFile(ec, fname);
        else if (outFmt.equalsIgnoreCase("csv"))
            writeCSVFile(ec, fname);
        else {
            // Default behavior
            MatrixObject mo = ec.getMatrixObject(getInput1().getName());
            mo.exportData(fname, outFmt, _formatProperties);
        }
    } else if (getInput1().getDataType() == DataType.FRAME) {
        String outFmt = getInput3().getName();
        FrameObject mo = ec.getFrameObject(getInput1().getName());
        mo.exportData(fname, outFmt, _formatProperties);
    }
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject)

Example 39 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class VariableCPInstruction method processMoveInstruction.

/**
 * Handler for mvvar instructions.
 * Example: mvvar &lt;srcvar&gt; &lt;destFile&gt; &lt;format&gt;
 * Move the file pointed by srcvar to destFile.
 * Currently, applicable only when format=binaryblock.
 *
 * @param ec execution context
 */
@SuppressWarnings("rawtypes")
private void processMoveInstruction(ExecutionContext ec) {
    if (getInput3() == null) {
        // example: mvvar tempA A
        // get source variable
        Data srcData = ec.getVariable(getInput1().getName());
        if (srcData == null) {
            throw new DMLRuntimeException("Unexpected error: could not find a data object " + "for variable name:" + getInput1().getName() + ", while processing instruction ");
        }
        if (getInput2().getDataType().isMatrix() || getInput2().getDataType().isFrame()) {
            // remove existing variable bound to target name
            Data tgt = ec.removeVariable(getInput2().getName());
            // cleanup matrix data on fs/hdfs (if necessary)
            if (tgt != null && tgt instanceof CacheableData) {
                ec.cleanupCacheableData((CacheableData<?>) tgt);
            }
        }
        // do the actual move
        ec.setVariable(getInput2().getName(), srcData);
        ec.removeVariable(getInput1().getName());
    } else {
        // example instruction: mvvar <srcVar> <destFile> <format>
        if (ec.getVariable(getInput1().getName()) == null)
            throw new DMLRuntimeException("Unexpected error: could not find a data object for variable name:" + getInput1().getName() + ", while processing instruction " + this.toString());
        Object object = ec.getVariable(getInput1().getName());
        if (getInput3().getName().equalsIgnoreCase("binaryblock")) {
            boolean success = false;
            success = ((CacheableData) object).moveData(getInput2().getName(), getInput3().getName());
            if (!success) {
                throw new DMLRuntimeException("Failed to move var " + getInput1().getName() + " to file " + getInput2().getName() + ".");
            }
        } else if (object instanceof MatrixObject)
            throw new DMLRuntimeException("Unexpected formats while copying: from matrix blocks [" + ((MatrixObject) object).getNumRowsPerBlock() + "," + ((MatrixObject) object).getNumColumnsPerBlock() + "] to " + getInput3().getName());
        else if (object instanceof FrameObject)
            throw new DMLRuntimeException("Unexpected formats while copying: from fram object [" + ((FrameObject) object).getNumColumns() + "," + ((FrameObject) object).getNumColumns() + "] to " + getInput3().getName());
    }
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) CacheableData(org.apache.sysml.runtime.controlprogram.caching.CacheableData) CacheableData(org.apache.sysml.runtime.controlprogram.caching.CacheableData) MetaData(org.apache.sysml.runtime.matrix.MetaData) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 40 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class VariableCPInstruction method writeCSVFile.

/**
 * Helper function to write CSV files to HDFS.
 *
 * @param ec execution context
 * @param fname file name
 */
private void writeCSVFile(ExecutionContext ec, String fname) {
    MatrixObject mo = ec.getMatrixObject(getInput1().getName());
    String outFmt = "csv";
    if (mo.isDirty()) {
        // there exist data computed in CP that is not backed up on HDFS
        // i.e., it is either in-memory or in evicted space
        mo.exportData(fname, outFmt, _formatProperties);
    } else {
        try {
            OutputInfo oi = ((MetaDataFormat) mo.getMetaData()).getOutputInfo();
            MatrixCharacteristics mc = ((MetaDataFormat) mo.getMetaData()).getMatrixCharacteristics();
            if (oi == OutputInfo.CSVOutputInfo) {
                WriterTextCSV writer = new WriterTextCSV((CSVFileFormatProperties) _formatProperties);
                writer.addHeaderToCSV(mo.getFileName(), fname, mc.getRows(), mc.getCols());
            } else if (oi == OutputInfo.BinaryBlockOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
                mo.exportData(fname, outFmt, _formatProperties);
            } else {
                throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into CSV format.");
            }
            // Write Metadata file
            MapReduceTool.writeMetaDataFile(fname + ".mtd", mo.getValueType(), mc, OutputInfo.CSVOutputInfo, _formatProperties);
        } catch (IOException e) {
            throw new DMLRuntimeException(e);
        }
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) WriterTextCSV(org.apache.sysml.runtime.io.WriterTextCSV) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)201 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)74 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)45 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)39 Data (org.apache.sysml.runtime.instructions.cp.Data)37 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)26 Pointer (jcuda.Pointer)20 CSRPointer (org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)20 IOException (java.io.IOException)17 ArrayList (java.util.ArrayList)16 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)14 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)13 CacheableData (org.apache.sysml.runtime.controlprogram.caching.CacheableData)12 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)12 Hop (org.apache.sysml.hops.Hop)11 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)11 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)10 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)10 Path (org.apache.hadoop.fs.Path)9 LongWritable (org.apache.hadoop.io.LongWritable)9