Search in sources :

Example 46 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class ProgramConverter method parseDataObject.

/**
 * NOTE: MRJobConfiguration cannot be used for the general case because program blocks and
 * related symbol tables can be hierarchically structured.
 *
 * @param in data object as string
 * @return array of objects
 */
public static Object[] parseDataObject(String in) {
    Object[] ret = new Object[2];
    StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
    String name = st.nextToken();
    DataType datatype = DataType.valueOf(st.nextToken());
    ValueType valuetype = ValueType.valueOf(st.nextToken());
    String valString = st.hasMoreTokens() ? st.nextToken() : "";
    Data dat = null;
    switch(datatype) {
        case SCALAR:
            {
                switch(valuetype) {
                    case INT:
                        dat = new IntObject(Long.parseLong(valString));
                        break;
                    case DOUBLE:
                        dat = new DoubleObject(Double.parseDouble(valString));
                        break;
                    case BOOLEAN:
                        dat = new BooleanObject(Boolean.parseBoolean(valString));
                        break;
                    case STRING:
                        dat = new StringObject(valString);
                        break;
                    default:
                        throw new DMLRuntimeException("Unable to parse valuetype " + valuetype);
                }
                break;
            }
        case MATRIX:
            {
                MatrixObject mo = new MatrixObject(valuetype, valString);
                long rows = Long.parseLong(st.nextToken());
                long cols = Long.parseLong(st.nextToken());
                int brows = Integer.parseInt(st.nextToken());
                int bcols = Integer.parseInt(st.nextToken());
                long nnz = Long.parseLong(st.nextToken());
                InputInfo iin = InputInfo.stringToInputInfo(st.nextToken());
                OutputInfo oin = OutputInfo.stringToOutputInfo(st.nextToken());
                PartitionFormat partFormat = PartitionFormat.valueOf(st.nextToken());
                UpdateType inplace = UpdateType.valueOf(st.nextToken());
                MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, brows, bcols, nnz);
                MetaDataFormat md = new MetaDataFormat(mc, oin, iin);
                mo.setMetaData(md);
                if (partFormat._dpf != PDataPartitionFormat.NONE)
                    mo.setPartitioned(partFormat._dpf, partFormat._N);
                mo.setUpdateType(inplace);
                dat = mo;
                break;
            }
        default:
            throw new DMLRuntimeException("Unable to parse datatype " + datatype);
    }
    ret[0] = name;
    ret[1] = dat;
    return ret;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ValueType(org.apache.sysml.parser.Expression.ValueType) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) Data(org.apache.sysml.runtime.instructions.cp.Data) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) UpdateType(org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) StringTokenizer(java.util.StringTokenizer) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) DataType(org.apache.sysml.parser.Expression.DataType) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject)

Example 47 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class ResultMergeLocalFile method createNewMatrixObject.

private MatrixObject createNewMatrixObject(MatrixObject output, ArrayList<MatrixObject> inMO) {
    MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
    MatrixObject moNew = new MatrixObject(_output.getValueType(), _outputFName);
    // create deep copy of metadata obj
    MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
    OutputInfo oiOld = metadata.getOutputInfo();
    InputInfo iiOld = metadata.getInputInfo();
    MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
    mc.setNonZeros(_isAccum ? -1 : computeNonZeros(output, inMO));
    MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
    moNew.setMetaData(meta);
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 48 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class ResultMergeLocalMemory method createNewMatrixObject.

private MatrixObject createNewMatrixObject(MatrixBlock data) {
    ValueType vt = _output.getValueType();
    MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
    MatrixObject moNew = new MatrixObject(vt, _outputFName);
    // create deep copy of metadata obj
    MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
    OutputInfo oiOld = metadata.getOutputInfo();
    InputInfo iiOld = metadata.getInputInfo();
    MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
    mc.setNonZeros(data.getNonZeros());
    MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
    moNew.setMetaData(meta);
    // adjust dense/sparse representation
    data.examSparsity();
    // release new output
    moNew.acquireModify(data);
    moNew.release();
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) ValueType(org.apache.sysml.parser.Expression.ValueType) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 49 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class ResultMergeRemoteMR method executeParallelMerge.

@Override
public MatrixObject executeParallelMerge(int par) {
    // always create new matrix object (required for nested parallelism)
    MatrixObject moNew = null;
    if (LOG.isTraceEnabled())
        LOG.trace("ResultMerge (remote, mr): Execute serial merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
    try {
        // collect all relevant inputs
        Collection<String> srcFnames = new LinkedList<>();
        ArrayList<MatrixObject> inMO = new ArrayList<>();
        for (MatrixObject in : _inputs) {
            // check for empty inputs (no iterations executed)
            if (in != null && in != _output) {
                // ensure that input file resides on disk
                in.exportData();
                // add to merge list
                srcFnames.add(in.getFileName());
                inMO.add(in);
            }
        }
        if (!srcFnames.isEmpty()) {
            // ensure that outputfile (for comparison) resides on disk
            _output.exportData();
            // actual merge
            MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
            MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
            String fnameCompare = _output.getFileName();
            if (mcOld.getNonZeros() == 0)
                // no compare required
                fnameCompare = null;
            executeMerge(fnameCompare, _outputFName, srcFnames.toArray(new String[0]), metadata.getInputInfo(), metadata.getOutputInfo(), mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
            // create new output matrix (e.g., to prevent potential export<->read file access conflict
            moNew = new MatrixObject(_output.getValueType(), _outputFName);
            OutputInfo oiOld = metadata.getOutputInfo();
            InputInfo iiOld = metadata.getInputInfo();
            MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
            mc.setNonZeros(_isAccum ? -1 : computeNonZeros(_output, inMO));
            MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
            moNew.setMetaData(meta);
        } else {
            // return old matrix, to prevent copy
            moNew = _output;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 50 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class ResultMergeRemoteSpark method executeMerge.

@SuppressWarnings("unchecked")
protected RDDObject executeMerge(MatrixObject compare, MatrixObject[] inputs, long rlen, long clen, int brlen, int bclen) {
    String jobname = "ParFor-RMSP";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    SparkExecutionContext sec = (SparkExecutionContext) _ec;
    boolean withCompare = (compare != null);
    RDDObject ret = null;
    // determine degree of parallelism
    int numRed = (int) determineNumReducers(rlen, clen, brlen, bclen, _numReducers);
    // sanity check for empty src files
    if (inputs == null || inputs.length == 0)
        throw new DMLRuntimeException("Execute merge should never be called with no inputs.");
    try {
        // note: initial implementation via union over all result rdds discarded due to
        // stack overflow errors with many parfor tasks, and thus many rdds
        // Step 1: construct input rdd from all result files of parfor workers
        // a) construct job conf with all files
        InputInfo ii = InputInfo.BinaryBlockInputInfo;
        JobConf job = new JobConf(ResultMergeRemoteMR.class);
        job.setJobName(jobname);
        job.setInputFormat(ii.inputFormatClass);
        Path[] paths = new Path[inputs.length];
        for (int i = 0; i < paths.length; i++) {
            // ensure input exists on hdfs (e.g., if in-memory or RDD)
            inputs[i].exportData();
            paths[i] = new Path(inputs[i].getFileName());
            // update rdd handle to allow lazy evaluation by guarding
            // against cleanup of temporary result files
            setRDDHandleForMerge(inputs[i], sec);
        }
        FileInputFormat.setInputPaths(job, paths);
        // b) create rdd from input files w/ deep copy of keys and blocks
        JavaPairRDD<MatrixIndexes, MatrixBlock> rdd = sec.getSparkContext().hadoopRDD(job, ii.inputFormatClass, ii.inputKeyClass, ii.inputValueClass).mapPartitionsToPair(new CopyBlockPairFunction(true), true);
        // Step 2a: merge with compare
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
        if (withCompare) {
            JavaPairRDD<MatrixIndexes, MatrixBlock> compareRdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) sec.getRDDHandleForMatrixObject(compare, InputInfo.BinaryBlockInputInfo);
            // merge values which differ from compare values
            ResultMergeRemoteSparkWCompare cfun = new ResultMergeRemoteSparkWCompare(_isAccum);
            out = // group all result blocks per key
            rdd.groupByKey(numRed).join(// join compare block and result blocks
            compareRdd).mapToPair(// merge result blocks w/ compare
            cfun);
        } else // Step 2b: merge without compare
        {
            // direct merge in any order (disjointness guaranteed)
            out = _isAccum ? RDDAggregateUtils.sumByKeyStable(rdd, false) : RDDAggregateUtils.mergeByKey(rdd, false);
        }
        // Step 3: create output rdd handle w/ lineage
        ret = new RDDObject(out);
        for (int i = 0; i < paths.length; i++) ret.addLineageChild(inputs[i].getRDDHandle());
        if (withCompare)
            ret.addLineageChild(compare.getRDDHandle());
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    // maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if (DMLScript.STATISTICS) {
        Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) CopyBlockPairFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyBlockPairFunction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)74 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)38 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)30 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)26 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)20 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)20 IOException (java.io.IOException)17 JobConf (org.apache.hadoop.mapred.JobConf)13 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)13 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)12 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)11 RunningJob (org.apache.hadoop.mapred.RunningJob)10 Path (org.apache.hadoop.fs.Path)9 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)9 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)9 DMLConfig (org.apache.sysml.conf.DMLConfig)8 ValueType (org.apache.sysml.parser.Expression.ValueType)8 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)7 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)7 Group (org.apache.hadoop.mapred.Counters.Group)6