Search in sources :

Example 26 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class ParameterizedBuiltinOp method inferOutputCharacteristics.

@Override
protected long[] inferOutputCharacteristics(MemoTable memo) {
    //Notes: CDF, TOSTRING always known because scalar outputs
    long[] ret = null;
    Hop input = getTargetHop();
    MatrixCharacteristics mc = memo.getAllInputStats(input);
    if (_op == ParamBuiltinOp.GROUPEDAGG) {
        // Get the number of groups provided as part of aggregate() invocation, whenever available.
        if (_paramIndexMap.get(Statement.GAGG_NUM_GROUPS) != null) {
            Hop ngroups = getInput().get(_paramIndexMap.get(Statement.GAGG_NUM_GROUPS));
            if (ngroups != null && ngroups instanceof LiteralOp) {
                long m = HopRewriteUtils.getIntValueSafe((LiteralOp) ngroups);
                long n = (mc.getRows() == 1) ? 1 : mc.getCols();
                return new long[] { m, n, m };
            }
        }
        // Output dimensions are completely data dependent. In the worst case, 
        // #groups = #rows in the grouping attribute (e.g., categorical attribute is an ID column, say EmployeeID).
        // In such a case, #rows in the output = #rows in the input. Also, output sparsity is 
        // likely to be 1.0 (e.g., groupedAgg(groups=<a ID column>, fn="count"))
        long m = mc.getRows();
        long n = (mc.getRows() == 1) ? 1 : mc.getCols();
        if (m >= 1) {
            ret = new long[] { m, n, m };
        }
    } else if (_op == ParamBuiltinOp.RMEMPTY) {
        // change (denser output).
        if (mc.dimsKnown()) {
            String margin = "rows";
            Hop marginHop = getInput().get(_paramIndexMap.get("margin"));
            if (marginHop instanceof LiteralOp && "cols".equals(((LiteralOp) marginHop).getStringValue()))
                margin = new String("cols");
            MatrixCharacteristics mcSelect = null;
            if (_paramIndexMap.get("select") != null) {
                Hop select = getInput().get(_paramIndexMap.get("select"));
                mcSelect = memo.getAllInputStats(select);
            }
            long lDim1 = 0, lDim2 = 0;
            if (margin.equals("rows")) {
                lDim1 = (mcSelect == null || !mcSelect.nnzKnown()) ? mc.getRows() : mcSelect.getNonZeros();
                lDim2 = mc.getCols();
            } else {
                lDim1 = mc.getRows();
                lDim2 = (mcSelect == null || !mcSelect.nnzKnown()) ? mc.getCols() : mcSelect.getNonZeros();
            }
            ret = new long[] { lDim1, lDim2, mc.getNonZeros() };
        }
    } else if (_op == ParamBuiltinOp.REPLACE) {
        // #nnz depends on the replacement pattern and value, same as input if non-zero
        if (mc.dimsKnown()) {
            if (isNonZeroReplaceArguments())
                ret = new long[] { mc.getRows(), mc.getCols(), mc.getNonZeros() };
            else
                ret = new long[] { mc.getRows(), mc.getCols(), -1 };
        }
    } else if (_op == ParamBuiltinOp.REXPAND) {
        //dimensions are exactly known from input, sparsity unknown but upper bounded by nrow(v)
        //note: cannot infer exact sparsity due to missing cast for outer and potential cutoff for table
        //but very good sparsity estimate possible (number of non-zeros in input)
        Hop max = getInput().get(_paramIndexMap.get("max"));
        Hop dir = getInput().get(_paramIndexMap.get("dir"));
        double maxVal = HopRewriteUtils.getDoubleValueSafe((LiteralOp) max);
        String dirVal = ((LiteralOp) dir).getStringValue();
        if (mc.dimsKnown()) {
            long lnnz = mc.nnzKnown() ? mc.getNonZeros() : mc.getRows();
            if ("cols".equals(dirVal)) {
                //expand horizontally
                ret = new long[] { mc.getRows(), UtilFunctions.toLong(maxVal), lnnz };
            } else if ("rows".equals(dirVal)) {
                //expand vertically
                ret = new long[] { UtilFunctions.toLong(maxVal), mc.getRows(), lnnz };
            }
        }
    } else if (_op == ParamBuiltinOp.TRANSFORMDECODE) {
        if (mc.dimsKnown()) {
            //cols: dummy coding might decrease never increase cols 
            return new long[] { mc.getRows(), mc.getCols(), mc.getRows() * mc.getCols() };
        }
    } else if (_op == ParamBuiltinOp.TRANSFORMAPPLY) {
        if (mc.dimsKnown()) {
            //cols: dummy coding and binning might increase cols but nnz stays constant
            return new long[] { mc.getRows(), mc.getCols(), mc.getRows() * mc.getCols() };
        }
    }
    return ret;
}
Also used : MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 27 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class LeftIndexingOp method inferOutputCharacteristics.

@Override
protected long[] inferOutputCharacteristics(MemoTable memo) {
    long[] ret = null;
    //original matrix
    Hop input1 = getInput().get(0);
    //right matrix		
    Hop input2 = getInput().get(1);
    MatrixCharacteristics mc1 = memo.getAllInputStats(input1);
    MatrixCharacteristics mc2 = memo.getAllInputStats(input2);
    if (mc1.dimsKnown()) {
        double sparsity = OptimizerUtils.getLeftIndexingSparsity(mc1.getRows(), mc1.getCols(), mc1.getNonZeros(), mc2.getRows(), mc2.getCols(), mc2.getNonZeros());
        long lnnz = !hasConstantIndexingRange() ? -1 : (long) (sparsity * mc1.getRows() * mc1.getCols());
        ret = new long[] { mc1.getRows(), mc1.getCols(), lnnz };
    }
    return ret;
}
Also used : MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 28 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class LeftIndexingOp method computeMemEstimate.

@Override
public void computeMemEstimate(MemoTable memo) {
    //overwrites default hops behavior
    super.computeMemEstimate(memo);
    //changed final estimate (infer and use input size)
    Hop rhM = getInput().get(1);
    MatrixCharacteristics mcRhM = memo.getAllInputStats(rhM);
    //TODO also use worstcase estimate for output
    if (dimsKnown() && !(rhM.dimsKnown() || mcRhM.dimsKnown())) {
        // unless second input is single cell / row vector / column vector
        // use worst-case memory estimate for second input (it cannot be larger than overall matrix)
        double subSize = -1;
        if (_rowLowerEqualsUpper && _colLowerEqualsUpper)
            subSize = OptimizerUtils.estimateSize(1, 1);
        else if (_rowLowerEqualsUpper)
            subSize = OptimizerUtils.estimateSize(1, _dim2);
        else if (_colLowerEqualsUpper)
            subSize = OptimizerUtils.estimateSize(_dim1, 1);
        else
            //worstcase
            subSize = _outputMemEstimate;
        _memEstimate = //original matrix (left)
        getInputSize(0) + // new submatrix (right)
        subSize + //output size (output)
        _outputMemEstimate;
    } else if (dimsKnown() && _nnz < 0 && _memEstimate >= OptimizerUtils.DEFAULT_SIZE) {
        //try a last attempt to infer a reasonable estimate wrt output sparsity
        //(this is important for indexing sparse matrices into empty matrices).
        MatrixCharacteristics mcM1 = memo.getAllInputStats(getInput().get(0));
        MatrixCharacteristics mcM2 = memo.getAllInputStats(getInput().get(1));
        if (mcM1.getNonZeros() >= 0 && mcM2.getNonZeros() >= 0 && hasConstantIndexingRange()) {
            long lnnz = mcM1.getNonZeros() + mcM2.getNonZeros();
            _outputMemEstimate = computeOutputMemEstimate(_dim1, _dim2, lnnz);
            _memEstimate = //original matrix (left)
            getInputSize(0) + // new submatrix (right)
            getInputSize(1) + //output size (output)
            _outputMemEstimate;
        }
    }
}
Also used : MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 29 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class MemoTable method getAllInputStats.

public MatrixCharacteristics getAllInputStats(Hop input) {
    if (input == null)
        return null;
    MatrixCharacteristics ret = null;
    long dim1 = input.getDim1();
    long dim2 = input.getDim2();
    long nnz = input.getNnz();
    if (//all dims known
    input.dimsKnown(true)) {
        ret = new MatrixCharacteristics(dim1, dim2, -1, -1, nnz);
    } else //enrich exact information with worst-case stats
    {
        MatrixCharacteristics tmp = _memo.get(input.getHopID());
        if (tmp != null) {
            dim1 = (dim1 <= 0) ? tmp.getRows() : dim1;
            dim2 = (dim2 <= 0) ? tmp.getCols() : dim2;
            nnz = (nnz < 0) ? tmp.getNonZeros() : nnz;
        }
        ret = new MatrixCharacteristics(dim1, dim2, -1, -1, nnz);
    }
    return ret;
}
Also used : MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 30 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class MemoTable method getAllInputStats.

public MatrixCharacteristics[] getAllInputStats(ArrayList<Hop> inputs) {
    if (inputs == null)
        return null;
    MatrixCharacteristics[] ret = new MatrixCharacteristics[inputs.size()];
    for (int i = 0; i < inputs.size(); i++) {
        Hop input = inputs.get(i);
        long dim1 = input.getDim1();
        long dim2 = input.getDim2();
        long nnz = input.getNnz();
        if (//all dims known
        input.dimsKnown()) {
            ret[i] = new MatrixCharacteristics(dim1, dim2, -1, -1, nnz);
        } else {
            MatrixCharacteristics tmp = _memo.get(input.getHopID());
            if (tmp != null) {
                //enrich exact information with worst-case stats
                dim1 = (dim1 <= 0) ? tmp.getRows() : dim1;
                dim2 = (dim2 <= 0) ? tmp.getCols() : dim2;
                nnz = (nnz <= 0) ? tmp.getNonZeros() : nnz;
            }
            ret[i] = new MatrixCharacteristics(dim1, dim2, -1, -1, nnz);
        }
    }
    return ret;
}
Also used : MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)258 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)87 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)85 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)57 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)52 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)48 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)41 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)36 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)35 CellIndex (org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)34 IOException (java.io.IOException)27 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)24 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)21 ArrayList (java.util.ArrayList)19 ValueType (org.apache.sysml.parser.Expression.ValueType)19 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)16 Path (org.apache.hadoop.fs.Path)13 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)13 Test (org.junit.Test)13 LongWritable (org.apache.hadoop.io.LongWritable)12