Search in sources :

Example 41 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class ResultMergeLocalMemory method createNewMatrixObject.

private MatrixObject createNewMatrixObject(MatrixBlock data) {
    ValueType vt = _output.getValueType();
    MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
    MatrixObject moNew = new MatrixObject(vt, _outputFName);
    // create deep copy of metadata obj
    MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
    OutputInfo oiOld = metadata.getOutputInfo();
    InputInfo iiOld = metadata.getInputInfo();
    MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
    mc.setNonZeros(data.getNonZeros());
    MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
    moNew.setMetaData(meta);
    // adjust dense/sparse representation
    data.examSparsity();
    // release new output
    moNew.acquireModify(data);
    moNew.release();
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) ValueType(org.apache.sysml.parser.Expression.ValueType) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 42 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class ResultMergeRemoteMR method executeParallelMerge.

@Override
public MatrixObject executeParallelMerge(int par) {
    // always create new matrix object (required for nested parallelism)
    MatrixObject moNew = null;
    if (LOG.isTraceEnabled())
        LOG.trace("ResultMerge (remote, mr): Execute serial merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
    try {
        // collect all relevant inputs
        Collection<String> srcFnames = new LinkedList<>();
        ArrayList<MatrixObject> inMO = new ArrayList<>();
        for (MatrixObject in : _inputs) {
            // check for empty inputs (no iterations executed)
            if (in != null && in != _output) {
                // ensure that input file resides on disk
                in.exportData();
                // add to merge list
                srcFnames.add(in.getFileName());
                inMO.add(in);
            }
        }
        if (!srcFnames.isEmpty()) {
            // ensure that outputfile (for comparison) resides on disk
            _output.exportData();
            // actual merge
            MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
            MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
            String fnameCompare = _output.getFileName();
            if (mcOld.getNonZeros() == 0)
                // no compare required
                fnameCompare = null;
            executeMerge(fnameCompare, _outputFName, srcFnames.toArray(new String[0]), metadata.getInputInfo(), metadata.getOutputInfo(), mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
            // create new output matrix (e.g., to prevent potential export<->read file access conflict
            moNew = new MatrixObject(_output.getValueType(), _outputFName);
            OutputInfo oiOld = metadata.getOutputInfo();
            InputInfo iiOld = metadata.getInputInfo();
            MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
            mc.setNonZeros(_isAccum ? -1 : computeNonZeros(_output, inMO));
            MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
            moNew.setMetaData(meta);
        } else {
            // return old matrix, to prevent copy
            moNew = _output;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 43 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class ParForProgramBlock method executeRemoteMRParForDP.

private void executeRemoteMRParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
    /* Step 0) check and recompile MR inst
		 * Step 1) serialize child PB and inst
		 * Step 2) create and serialize tasks
		 * Step 3) submit MR Jobs and wait for results
		 * Step 4) collect results from each parallel worker
		 */
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = checkMRAndRecompileToCP(0);
    // Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
    PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
    // mark matrix var as partitioned
    inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
    // Step 2) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single
    // job, cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    String program = ProgramConverter.serializeParForBody(body);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 3) create tasks
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    // partitioner.createTasks().size();
    long numCreatedTasks = numIterations;
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    // write matrices to HDFS
    exportMatricesToHDFS(ec);
    // Step 4) submit MR job (wait for finished work)
    OutputInfo inputOI = ((inputMatrix.getSparsity() < 0.1 && inputDPF == PartitionFormat.COLUMN_WISE) || (inputMatrix.getSparsity() < 0.001 && inputDPF == PartitionFormat.ROW_WISE)) ? OutputInfo.BinaryCellOutputInfo : OutputInfo.BinaryBlockOutputInfo;
    RemoteParForJobReturn ret = RemoteDPParForMR.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, resultFile, inputMatrix, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads, _replicationDP);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 5) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    // consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (// see step 0
    flagForced)
        releaseForcedRecompile(0);
    inputMatrix.unsetPartitioned();
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Example 44 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class ParForProgramBlock method executeRemoteSparkParForDP.

private void executeRemoteSparkParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = checkMRAndRecompileToCP(0);
    // Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
    PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
    // mark matrix var as partitioned
    inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
    // Step 2) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single
    // job, cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    HashMap<String, byte[]> clsMap = new HashMap<>();
    String program = ProgramConverter.serializeParForBody(body, clsMap);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 3) create tasks
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    // partitioner.createTasks().size();
    long numCreatedTasks = numIterations;
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    // write matrices to HDFS, except DP matrix which is the input to the RemoteDPParForSpark job
    exportMatricesToHDFS(ec, _colocatedDPMatrix);
    // Step 4) submit MR job (wait for finished work)
    // TODO runtime support for binary cell partitioning
    OutputInfo inputOI = OutputInfo.BinaryBlockOutputInfo;
    RemoteParForJobReturn ret = RemoteDPParForSpark.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, clsMap, resultFile, inputMatrix, ec, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 5) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    // consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (// see step 0
    flagForced)
        releaseForcedRecompile(0);
    inputMatrix.unsetPartitioned();
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) HashMap(java.util.HashMap) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Example 45 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class VariableCPInstruction method parseInstruction.

public static VariableCPInstruction parseInstruction(String str) {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = parts[0];
    VariableOperationCode voc = getVariableOperationCode(opcode);
    if (voc == VariableOperationCode.CreateVariable) {
        if (// && parts.length != 10 )
        parts.length < 5)
            throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
    } else if (voc == VariableOperationCode.MoveVariable) {
        // mvvar tempA A; or mvvar mvar5 "data/out.mtx" "binary"
        if (parts.length != 3 && parts.length != 4)
            throw new DMLRuntimeException("Invalid number of operands in mvvar instruction: " + str);
    } else if (voc == VariableOperationCode.Write) {
        // Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
        if (parts.length != 5 && parts.length != 8)
            throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
    } else {
        if (voc != VariableOperationCode.RemoveVariable)
            // no output
            InstructionUtils.checkNumFields(parts, getArity(voc));
    }
    CPOperand in1 = null, in2 = null, in3 = null, in4 = null, out = null;
    switch(voc) {
        case CreateVariable:
            // variable name
            DataType dt = DataType.valueOf(parts[4]);
            ValueType vt = dt == DataType.MATRIX ? ValueType.DOUBLE : ValueType.STRING;
            int extSchema = (dt == DataType.FRAME && parts.length >= 13) ? 1 : 0;
            in1 = new CPOperand(parts[1], vt, dt);
            // file name
            in2 = new CPOperand(parts[2], ValueType.STRING, DataType.SCALAR);
            // file name override flag (always literal)
            in3 = new CPOperand(parts[3], ValueType.BOOLEAN, DataType.SCALAR);
            // format
            String fmt = parts[5];
            if (fmt.equalsIgnoreCase("csv")) {
                // 14 inputs: createvar corresponding to READ -- includes properties hasHeader, delim, fill, and fillValue
                if (parts.length < 15 + extSchema || parts.length > 17 + extSchema)
                    throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
            } else {
                if (parts.length != 6 && parts.length != 12 + extSchema)
                    throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
            }
            OutputInfo oi = OutputInfo.stringToOutputInfo(fmt);
            InputInfo ii = OutputInfo.getMatchingInputInfo(oi);
            MatrixCharacteristics mc = new MatrixCharacteristics();
            if (parts.length == 6) {
            // do nothing
            } else if (parts.length >= 11) {
                // matrix characteristics
                mc.setDimension(Long.parseLong(parts[6]), Long.parseLong(parts[7]));
                mc.setBlockSize(Integer.parseInt(parts[8]), Integer.parseInt(parts[9]));
                mc.setNonZeros(Long.parseLong(parts[10]));
            } else {
                throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
            }
            MetaDataFormat iimd = new MetaDataFormat(mc, oi, ii);
            UpdateType updateType = UpdateType.COPY;
            if (parts.length >= 12)
                updateType = UpdateType.valueOf(parts[11].toUpperCase());
            // handle frame schema
            String schema = (dt == DataType.FRAME && parts.length >= 13) ? parts[parts.length - 1] : null;
            if (fmt.equalsIgnoreCase("csv")) {
                // Cretevar instructions for CSV format either has 13 or 14 inputs.
                // 13 inputs: createvar corresponding to WRITE -- includes properties hasHeader, delim, and sparse
                // 14 inputs: createvar corresponding to READ -- includes properties hasHeader, delim, fill, and fillValue
                FileFormatProperties fmtProperties = null;
                if (parts.length == 15 + extSchema) {
                    boolean hasHeader = Boolean.parseBoolean(parts[12]);
                    String delim = parts[13];
                    boolean sparse = Boolean.parseBoolean(parts[14]);
                    fmtProperties = new CSVFileFormatProperties(hasHeader, delim, sparse);
                } else {
                    boolean hasHeader = Boolean.parseBoolean(parts[12]);
                    String delim = parts[13];
                    boolean fill = Boolean.parseBoolean(parts[14]);
                    double fillValue = UtilFunctions.parseToDouble(parts[15]);
                    String naStrings = null;
                    if (parts.length == 17 + extSchema)
                        naStrings = parts[16];
                    fmtProperties = new CSVFileFormatProperties(hasHeader, delim, fill, fillValue, naStrings);
                }
                return new VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3, iimd, updateType, fmtProperties, schema, opcode, str);
            } else {
                return new VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3, iimd, updateType, schema, opcode, str);
            }
        case AssignVariable:
            in1 = new CPOperand(parts[1]);
            in2 = new CPOperand(parts[2]);
            break;
        case CopyVariable:
            // Value types are not given here
            in1 = new CPOperand(parts[1], ValueType.UNKNOWN, DataType.UNKNOWN);
            in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN);
            break;
        case MoveVariable:
            in1 = new CPOperand(parts[1], ValueType.UNKNOWN, DataType.UNKNOWN);
            in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN);
            if (parts.length > 3)
                in3 = new CPOperand(parts[3], ValueType.UNKNOWN, DataType.UNKNOWN);
            break;
        case RemoveVariable:
            VariableCPInstruction rminst = new VariableCPInstruction(getVariableOperationCode(opcode), null, null, null, out, opcode, str);
            for (int i = 1; i < parts.length; i++) rminst.addInput(new CPOperand(parts[i], ValueType.UNKNOWN, DataType.SCALAR));
            return rminst;
        case RemoveVariableAndFile:
            in1 = new CPOperand(parts[1]);
            in2 = new CPOperand(parts[2]);
            // second argument must be a boolean
            if (in2.getValueType() != ValueType.BOOLEAN)
                throw new DMLRuntimeException("Unexpected value type for second argument in: " + str);
            break;
        case CastAsScalarVariable:
        case CastAsMatrixVariable:
        case CastAsFrameVariable:
        case CastAsDoubleVariable:
        case CastAsIntegerVariable:
        case CastAsBooleanVariable:
            // first operand is a variable name => string value type
            in1 = new CPOperand(parts[1]);
            // output variable name
            out = new CPOperand(parts[2]);
            break;
        case Write:
            in1 = new CPOperand(parts[1]);
            in2 = new CPOperand(parts[2]);
            in3 = new CPOperand(parts[3]);
            FileFormatProperties fprops = null;
            if (in3.getName().equalsIgnoreCase("csv")) {
                boolean hasHeader = Boolean.parseBoolean(parts[4]);
                String delim = parts[5];
                boolean sparse = Boolean.parseBoolean(parts[6]);
                fprops = new CSVFileFormatProperties(hasHeader, delim, sparse);
                // description
                in4 = new CPOperand(parts[7]);
            } else {
                fprops = new FileFormatProperties();
                // description
                in4 = new CPOperand(parts[4]);
            }
            VariableCPInstruction inst = new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, out, null, fprops, null, null, opcode, str);
            inst.addInput(in4);
            return inst;
        case Read:
            in1 = new CPOperand(parts[1]);
            in2 = new CPOperand(parts[2]);
            out = null;
            break;
        case SetFileName:
            // variable name
            in1 = new CPOperand(parts[1]);
            // file name
            in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN);
            // option: remote or local
            in3 = new CPOperand(parts[3], ValueType.UNKNOWN, DataType.UNKNOWN);
            // return new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, str);
            break;
    }
    return new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, out, opcode, str);
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) ValueType(org.apache.sysml.parser.Expression.ValueType) UpdateType(org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FileFormatProperties(org.apache.sysml.runtime.matrix.data.FileFormatProperties) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) DataType(org.apache.sysml.parser.Expression.DataType)

Aggregations

OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)69 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)34 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)30 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)28 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)25 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 IOException (java.io.IOException)16 ValueType (org.apache.sysml.parser.Expression.ValueType)10 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)10 HashMap (java.util.HashMap)6 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)6 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)5 Matrix (org.apache.sysml.udf.Matrix)5 Scalar (org.apache.sysml.udf.Scalar)5 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JobConf (org.apache.hadoop.mapred.JobConf)4 RunningJob (org.apache.hadoop.mapred.RunningJob)4 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)4 LopsException (org.apache.sysml.lops.LopsException)4