Search in sources :

Example 31 with Data

use of org.apache.sysml.runtime.instructions.cp.Data in project incubator-systemml by apache.

the class Recompiler method rUpdateStatistics.

public static void rUpdateStatistics(Hop hop, LocalVariableMap vars) {
    if (hop.isVisited())
        return;
    // recursively process children
    if (hop.getInput() != null)
        for (Hop c : hop.getInput()) rUpdateStatistics(c, vars);
    boolean updatedSizeExpr = false;
    // (with awareness not to override persistent reads to an existing name)
    if (hop instanceof DataOp && ((DataOp) hop).getDataOpType() != DataOpTypes.PERSISTENTREAD) {
        DataOp d = (DataOp) hop;
        String varName = d.getName();
        if (vars.keySet().contains(varName)) {
            Data dat = vars.get(varName);
            if (dat instanceof MatrixObject) {
                MatrixObject mo = (MatrixObject) dat;
                d.setDim1(mo.getNumRows());
                d.setDim2(mo.getNumColumns());
                d.setNnz(mo.getNnz());
            } else if (dat instanceof FrameObject) {
                FrameObject fo = (FrameObject) dat;
                d.setDim1(fo.getNumRows());
                d.setDim2(fo.getNumColumns());
            }
        }
    } else // special case for persistent reads with unknown size (read-after-write)
    if (hop instanceof DataOp && ((DataOp) hop).getDataOpType() == DataOpTypes.PERSISTENTREAD && !hop.dimsKnown() && ((DataOp) hop).getInputFormatType() != FileFormatTypes.CSV && !ConfigurationManager.getCompilerConfigFlag(ConfigType.IGNORE_READ_WRITE_METADATA)) {
        // update hop with read meta data
        DataOp dop = (DataOp) hop;
        tryReadMetaDataFileMatrixCharacteristics(dop);
    } else // update size expression for rand/seq according to symbol table entries
    if (hop instanceof DataGenOp) {
        DataGenOp d = (DataGenOp) hop;
        HashMap<String, Integer> params = d.getParamIndexMap();
        if (d.getOp() == DataGenMethod.RAND || d.getOp() == DataGenMethod.SINIT || d.getOp() == DataGenMethod.SAMPLE) {
            boolean initUnknown = !d.dimsKnown();
            int ix1 = params.get(DataExpression.RAND_ROWS);
            int ix2 = params.get(DataExpression.RAND_COLS);
            // update rows/cols by evaluating simple expression of literals, nrow, ncol, scalars, binaryops
            HashMap<Long, Long> memo = new HashMap<>();
            d.refreshRowsParameterInformation(d.getInput().get(ix1), vars, memo);
            d.refreshColsParameterInformation(d.getInput().get(ix2), vars, memo);
            updatedSizeExpr = initUnknown & d.dimsKnown();
        } else if (d.getOp() == DataGenMethod.SEQ) {
            boolean initUnknown = !d.dimsKnown();
            int ix1 = params.get(Statement.SEQ_FROM);
            int ix2 = params.get(Statement.SEQ_TO);
            int ix3 = params.get(Statement.SEQ_INCR);
            HashMap<Long, Double> memo = new HashMap<>();
            double from = d.computeBoundsInformation(d.getInput().get(ix1), vars, memo);
            double to = d.computeBoundsInformation(d.getInput().get(ix2), vars, memo);
            double incr = d.computeBoundsInformation(d.getInput().get(ix3), vars, memo);
            // special case increment
            if (from != Double.MAX_VALUE && to != Double.MAX_VALUE) {
                incr *= ((from > to && incr > 0) || (from < to && incr < 0)) ? -1.0 : 1.0;
            }
            if (from != Double.MAX_VALUE && to != Double.MAX_VALUE && incr != Double.MAX_VALUE) {
                d.setDim1(UtilFunctions.getSeqLength(from, to, incr));
                d.setDim2(1);
                d.setIncrementValue(incr);
            }
            updatedSizeExpr = initUnknown & d.dimsKnown();
        } else {
            throw new DMLRuntimeException("Unexpected data generation method: " + d.getOp());
        }
    } else // update size expression for reshape according to symbol table entries
    if (hop instanceof ReorgOp && ((ReorgOp) (hop)).getOp() == Hop.ReOrgOp.RESHAPE) {
        ReorgOp d = (ReorgOp) hop;
        boolean initUnknown = !d.dimsKnown();
        HashMap<Long, Long> memo = new HashMap<>();
        d.refreshRowsParameterInformation(d.getInput().get(1), vars, memo);
        d.refreshColsParameterInformation(d.getInput().get(2), vars, memo);
        updatedSizeExpr = initUnknown & d.dimsKnown();
    } else // update size expression for indexing according to symbol table entries
    if (hop instanceof IndexingOp) {
        IndexingOp iop = (IndexingOp) hop;
        // inpRowL
        Hop input2 = iop.getInput().get(1);
        // inpRowU
        Hop input3 = iop.getInput().get(2);
        // inpColL
        Hop input4 = iop.getInput().get(3);
        // inpColU
        Hop input5 = iop.getInput().get(4);
        boolean initUnknown = !iop.dimsKnown();
        HashMap<Long, Double> memo = new HashMap<>();
        double rl = iop.computeBoundsInformation(input2, vars, memo);
        double ru = iop.computeBoundsInformation(input3, vars, memo);
        double cl = iop.computeBoundsInformation(input4, vars, memo);
        double cu = iop.computeBoundsInformation(input5, vars, memo);
        if (rl != Double.MAX_VALUE && ru != Double.MAX_VALUE)
            iop.setDim1((long) (ru - rl + 1));
        if (cl != Double.MAX_VALUE && cu != Double.MAX_VALUE)
            iop.setDim2((long) (cu - cl + 1));
        updatedSizeExpr = initUnknown & iop.dimsKnown();
    }
    // without overwriting inferred size expressions
    if (!updatedSizeExpr) {
        hop.refreshSizeInformation();
    }
    hop.setVisited();
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) HashMap(java.util.HashMap) Hop(org.apache.sysml.hops.Hop) CacheableData(org.apache.sysml.runtime.controlprogram.caching.CacheableData) Data(org.apache.sysml.runtime.instructions.cp.Data) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IndexingOp(org.apache.sysml.hops.IndexingOp) DataGenOp(org.apache.sysml.hops.DataGenOp) ReorgOp(org.apache.sysml.hops.ReorgOp) DataOp(org.apache.sysml.hops.DataOp)

Example 32 with Data

use of org.apache.sysml.runtime.instructions.cp.Data in project incubator-systemml by apache.

the class ProgramConverter method createDeepCopyExecutionContext.

// //////////////////////////////
// CREATION of DEEP COPIES
// //////////////////////////////
/**
 * Creates a deep copy of the given execution context.
 * For rt_platform=Hadoop, execution context has a symbol table.
 *
 * @param ec execution context
 * @return execution context
 * @throws CloneNotSupportedException if CloneNotSupportedException occurs
 */
public static ExecutionContext createDeepCopyExecutionContext(ExecutionContext ec) throws CloneNotSupportedException {
    ExecutionContext cpec = ExecutionContextFactory.createContext(false, ec.getProgram());
    cpec.setVariables((LocalVariableMap) ec.getVariables().clone());
    // (each worker requires its own copy of the empty matrix object)
    for (String var : cpec.getVariables().keySet()) {
        Data dat = cpec.getVariables().get(var);
        if (dat instanceof MatrixObject && ((MatrixObject) dat).getUpdateType().isInPlace()) {
            MatrixObject mo = (MatrixObject) dat;
            MatrixObject moNew = new MatrixObject(mo);
            if (mo.getNnz() != 0) {
                // If output matrix is not empty (NNZ != 0), then local copy is created so that
                // update in place operation can be applied.
                MatrixBlock mbVar = mo.acquireRead();
                moNew.acquireModify(new MatrixBlock(mbVar));
                mo.release();
            } else {
                // create empty matrix block w/ dense representation (preferred for update in-place)
                // Creating a dense matrix block is valid because empty block not allocated and transfer
                // to sparse representation happens in left indexing in place operation.
                moNew.acquireModify(new MatrixBlock((int) mo.getNumRows(), (int) mo.getNumColumns(), false));
            }
            moNew.release();
            cpec.setVariable(var, moNew);
        }
    }
    return cpec;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ExecutionContext(org.apache.sysml.runtime.controlprogram.context.ExecutionContext) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) Data(org.apache.sysml.runtime.instructions.cp.Data)

Example 33 with Data

use of org.apache.sysml.runtime.instructions.cp.Data in project incubator-systemml by apache.

the class ProgramConverter method parseDataObject.

/**
 * NOTE: MRJobConfiguration cannot be used for the general case because program blocks and
 * related symbol tables can be hierarchically structured.
 *
 * @param in data object as string
 * @return array of objects
 */
public static Object[] parseDataObject(String in) {
    Object[] ret = new Object[2];
    StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
    String name = st.nextToken();
    DataType datatype = DataType.valueOf(st.nextToken());
    ValueType valuetype = ValueType.valueOf(st.nextToken());
    String valString = st.hasMoreTokens() ? st.nextToken() : "";
    Data dat = null;
    switch(datatype) {
        case SCALAR:
            {
                switch(valuetype) {
                    case INT:
                        dat = new IntObject(Long.parseLong(valString));
                        break;
                    case DOUBLE:
                        dat = new DoubleObject(Double.parseDouble(valString));
                        break;
                    case BOOLEAN:
                        dat = new BooleanObject(Boolean.parseBoolean(valString));
                        break;
                    case STRING:
                        dat = new StringObject(valString);
                        break;
                    default:
                        throw new DMLRuntimeException("Unable to parse valuetype " + valuetype);
                }
                break;
            }
        case MATRIX:
            {
                MatrixObject mo = new MatrixObject(valuetype, valString);
                long rows = Long.parseLong(st.nextToken());
                long cols = Long.parseLong(st.nextToken());
                int brows = Integer.parseInt(st.nextToken());
                int bcols = Integer.parseInt(st.nextToken());
                long nnz = Long.parseLong(st.nextToken());
                InputInfo iin = InputInfo.stringToInputInfo(st.nextToken());
                OutputInfo oin = OutputInfo.stringToOutputInfo(st.nextToken());
                PartitionFormat partFormat = PartitionFormat.valueOf(st.nextToken());
                UpdateType inplace = UpdateType.valueOf(st.nextToken());
                MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, brows, bcols, nnz);
                MetaDataFormat md = new MetaDataFormat(mc, oin, iin);
                mo.setMetaData(md);
                if (partFormat._dpf != PDataPartitionFormat.NONE)
                    mo.setPartitioned(partFormat._dpf, partFormat._N);
                mo.setUpdateType(inplace);
                dat = mo;
                break;
            }
        default:
            throw new DMLRuntimeException("Unable to parse datatype " + datatype);
    }
    ret[0] = name;
    ret[1] = dat;
    return ret;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ValueType(org.apache.sysml.parser.Expression.ValueType) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) Data(org.apache.sysml.runtime.instructions.cp.Data) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) UpdateType(org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) StringTokenizer(java.util.StringTokenizer) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) DataType(org.apache.sysml.parser.Expression.DataType) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject)

Example 34 with Data

use of org.apache.sysml.runtime.instructions.cp.Data in project incubator-systemml by apache.

the class RemoteParForUtils method exportResultVariables.

/**
 * For remote Spark parfor workers. This is a simplified version compared to MR.
 *
 * @param workerID worker id
 * @param vars local variable map
 * @param resultVars list of result variables
 * @return list of result variables
 * @throws IOException if IOException occurs
 */
public static ArrayList<String> exportResultVariables(long workerID, LocalVariableMap vars, ArrayList<ResultVar> resultVars) throws IOException {
    ArrayList<String> ret = new ArrayList<>();
    // foreach result variables probe if export necessary
    for (ResultVar rvar : resultVars) {
        Data dat = vars.get(rvar._name);
        // export output variable to HDFS (see RunMRJobs)
        if (dat != null && dat.getDataType() == DataType.MATRIX) {
            MatrixObject mo = (MatrixObject) dat;
            if (mo.isDirty()) {
                // export result var (iff actually modified in parfor)
                mo.exportData();
                // pass output vars (scalars by value, matrix by ref) to result
                // (only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
                ret.add(ProgramConverter.serializeDataObject(rvar._name, mo));
            }
        }
    }
    return ret;
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ArrayList(java.util.ArrayList) CacheableData(org.apache.sysml.runtime.controlprogram.caching.CacheableData) Data(org.apache.sysml.runtime.instructions.cp.Data)

Example 35 with Data

use of org.apache.sysml.runtime.instructions.cp.Data in project incubator-systemml by apache.

the class RemoteParForUtils method exportResultVariables.

/**
 * For remote MR parfor workers.
 *
 * @param workerID worker id
 * @param vars local variable map
 * @param resultVars list of result variables
 * @param rvarFnames ?
 * @param out output collectors
 * @throws IOException if IOException occurs
 */
public static void exportResultVariables(long workerID, LocalVariableMap vars, ArrayList<ResultVar> resultVars, HashMap<String, String> rvarFnames, OutputCollector<Writable, Writable> out) throws IOException {
    // create key and value for reuse
    LongWritable okey = new LongWritable(workerID);
    Text ovalue = new Text();
    // foreach result variables probe if export necessary
    for (ResultVar rvar : resultVars) {
        Data dat = vars.get(rvar._name);
        // export output variable to HDFS (see RunMRJobs)
        if (dat != null && dat.getDataType() == DataType.MATRIX) {
            MatrixObject mo = (MatrixObject) dat;
            if (mo.isDirty()) {
                if (ParForProgramBlock.ALLOW_REUSE_MR_PAR_WORKER && rvarFnames != null) {
                    String fname = rvarFnames.get(rvar._name);
                    if (fname != null)
                        mo.setFileName(fname);
                    // export result var (iff actually modified in parfor)
                    // note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)
                    mo.exportData();
                    rvarFnames.put(rvar._name, mo.getFileName());
                } else {
                    // export result var (iff actually modified in parfor)
                    // note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)
                    mo.exportData();
                }
                // pass output vars (scalars by value, matrix by ref) to result
                // (only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
                String datStr = ProgramConverter.serializeDataObject(rvar._name, mo);
                ovalue.set(datStr);
                out.collect(okey, ovalue);
            }
        }
    }
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) Text(org.apache.hadoop.io.Text) CacheableData(org.apache.sysml.runtime.controlprogram.caching.CacheableData) Data(org.apache.sysml.runtime.instructions.cp.Data) LongWritable(org.apache.hadoop.io.LongWritable)

Aggregations

Data (org.apache.sysml.runtime.instructions.cp.Data)47 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)37 CacheableData (org.apache.sysml.runtime.controlprogram.caching.CacheableData)11 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)10 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)9 ResultVar (org.apache.sysml.parser.ParForStatementBlock.ResultVar)8 ArrayList (java.util.ArrayList)7 DataIdentifier (org.apache.sysml.parser.DataIdentifier)7 DataOp (org.apache.sysml.hops.DataOp)6 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)6 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)6 LiteralOp (org.apache.sysml.hops.LiteralOp)5 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)5 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)5 DMLException (org.apache.sysml.api.DMLException)4 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)4 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)4 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)4 HashMap (java.util.HashMap)3 Hop (org.apache.sysml.hops.Hop)3