Search in sources :

Example 71 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class MatrixIndexingCPFileInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    String opcode = getOpcode();
    IndexRange ixrange = getIndexRange(ec).add(1);
    MatrixObject mo = ec.getMatrixObject(input1.getName());
    if (mo.isPartitioned() && opcode.equalsIgnoreCase(RightIndex.OPCODE)) {
        MetaDataFormat meta = (MetaDataFormat) mo.getMetaData();
        MatrixCharacteristics mc = meta.getMatrixCharacteristics();
        String pfname = mo.getPartitionFileName(ixrange, mc.getRowsPerBlock(), mc.getColsPerBlock());
        if (MapReduceTool.existsFileOnHDFS(pfname)) {
            // create output matrix object
            MatrixObject mobj = new MatrixObject(mo.getValueType(), pfname);
            MatrixCharacteristics mcNew = null;
            switch(mo.getPartitionFormat()) {
                case ROW_WISE:
                    mcNew = new MatrixCharacteristics(1, mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
                    break;
                case ROW_BLOCK_WISE_N:
                    mcNew = new MatrixCharacteristics(mo.getPartitionSize(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
                    break;
                case COLUMN_WISE:
                    mcNew = new MatrixCharacteristics(mc.getRows(), 1, mc.getRowsPerBlock(), mc.getColsPerBlock());
                    break;
                case COLUMN_BLOCK_WISE_N:
                    mcNew = new MatrixCharacteristics(mc.getRows(), mo.getPartitionSize(), mc.getRowsPerBlock(), mc.getColsPerBlock());
                    break;
                default:
                    throw new DMLRuntimeException("Unsupported partition format for CP_FILE " + RightIndex.OPCODE + ": " + mo.getPartitionFormat());
            }
            MetaDataFormat metaNew = new MetaDataFormat(mcNew, meta.getOutputInfo(), meta.getInputInfo());
            mobj.setMetaData(metaNew);
            // put output object into symbol table
            ec.setVariable(output.getName(), mobj);
        } else {
            // will return an empty matrix partition
            MatrixBlock resultBlock = mo.readMatrixPartition(ixrange);
            ec.setMatrixOutput(output.getName(), resultBlock, getExtendedOpcode());
        }
    } else {
        throw new DMLRuntimeException("Invalid opcode or index predicate for MatrixIndexingCPFileInstruction: " + instString);
    }
}
Also used : IndexRange(org.apache.sysml.runtime.util.IndexRange) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 72 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetSparkEagerRDDCaching.

// /////
// REWRITE set spark eager rdd caching
// /
protected void rewriteSetSparkEagerRDDCaching(OptNode n, LocalVariableMap vars) {
    // get program blocks of root parfor
    Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
    ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
    ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
    ArrayList<String> ret = new ArrayList<>();
    if (// spark exec mode
    OptimizerUtils.isSparkExecutionMode() && // local parfor
    n.getExecType() == ExecType.CP && // at least 2 iterations
    _N > 1) {
        Set<String> cand = pfsb.variablesRead().getVariableNames();
        Collection<String> rpVars = pfpb.getSparkRepartitionVariables();
        for (String var : cand) {
            Data dat = vars.get(var);
            if (dat != null && dat instanceof MatrixObject && ((MatrixObject) dat).getRDDHandle() != null) {
                MatrixObject mo = (MatrixObject) dat;
                MatrixCharacteristics mc = mo.getMatrixCharacteristics();
                RDDObject rdd = mo.getRDDHandle();
                if (// not a repartition var
                (rpVars == null || !rpVars.contains(var)) && // is cached rdd
                rdd.rHasCheckpointRDDChilds() && // is out-of-core dataset
                _lm / n.getK() < OptimizerUtils.estimateSizeExactSparsity(mc)) {
                    ret.add(var);
                }
            }
        }
        // apply rewrite to parfor pb
        if (!ret.isEmpty()) {
            pfpb.setSparkEagerCacheVariables(ret);
        }
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set spark eager rdd caching' - result=" + ret.size() + " (" + ProgramConverter.serializeStringCollection(ret) + ")");
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ArrayList(java.util.ArrayList) Data(org.apache.sysml.runtime.instructions.cp.Data) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject)

Example 73 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class DataPartitioner method createPartitionedMatrixObject.

/**
 * Creates a partitioned matrix object based on the given input matrix object,
 * according to the specified split format. The input matrix can be in-memory
 * or still on HDFS and the partitioned output matrix is written to HDFS. The
 * created matrix object can be used transparently for obtaining the full matrix
 * or reading 1 or multiple partitions based on given index ranges.
 *
 * @param in input matrix object
 * @param out output matrix object
 * @param force if false, try to optimize
 * @return partitioned matrix object
 */
public MatrixObject createPartitionedMatrixObject(MatrixObject in, MatrixObject out, boolean force) {
    // check for naive partitioning
    if (_format == PDataPartitionFormat.NONE)
        return in;
    // analyze input matrix object
    MetaDataFormat meta = (MetaDataFormat) in.getMetaData();
    MatrixCharacteristics mc = meta.getMatrixCharacteristics();
    InputInfo ii = meta.getInputInfo();
    OutputInfo oi = meta.getOutputInfo();
    long rows = mc.getRows();
    long cols = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    long nonZeros = mc.getNonZeros();
    double sparsity = mc.dimsKnown(true) ? ((double) nonZeros) / (rows * cols) : 1.0;
    if (// try to optimize, if format not forced
    !force) {
        // check lower bound of useful data partitioning
        if (// or matrix already fits in mem
        rows < Hop.CPThreshold && cols < Hop.CPThreshold) {
            return in;
        }
        // check for changing to blockwise representations
        if (_format == PDataPartitionFormat.ROW_WISE && cols < Hop.CPThreshold) {
            LOG.debug("Changing format from " + PDataPartitionFormat.ROW_WISE + " to " + PDataPartitionFormat.ROW_BLOCK_WISE + ".");
            _format = PDataPartitionFormat.ROW_BLOCK_WISE;
        }
        if (_format == PDataPartitionFormat.COLUMN_WISE && rows < Hop.CPThreshold) {
            LOG.debug("Changing format from " + PDataPartitionFormat.COLUMN_WISE + " to " + PDataPartitionFormat.ROW_BLOCK_WISE + ".");
            _format = PDataPartitionFormat.COLUMN_BLOCK_WISE;
        }
    // _format = PDataPartitionFormat.ROW_BLOCK_WISE_N;
    }
    // check changing to binarycell in case of sparse cols (robustness)
    boolean convertBlock2Cell = false;
    if (ii == InputInfo.BinaryBlockInputInfo && _allowBinarycell && _format == PDataPartitionFormat.COLUMN_WISE && sparsity < SPARSITY_CELL_THRESHOLD) {
        LOG.debug("Changing partition outputinfo from binaryblock to binarycell due to sparsity=" + sparsity);
        oi = OutputInfo.BinaryCellOutputInfo;
        convertBlock2Cell = true;
    }
    // prepare filenames and cleanup if required
    String fnameNew = out.getFileName();
    try {
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    // core partitioning (depending on subclass)
    partitionMatrix(in, fnameNew, ii, oi, rows, cols, brlen, bclen);
    // create output matrix object
    out.setPartitioned(_format, _n);
    MatrixCharacteristics mcNew = new MatrixCharacteristics(rows, cols, (int) brlen, (int) bclen);
    mcNew.setNonZeros(nonZeros);
    if (convertBlock2Cell)
        ii = InputInfo.BinaryCellInputInfo;
    MetaDataFormat metaNew = new MetaDataFormat(mcNew, oi, ii);
    out.setMetaData(metaNew);
    return out;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 74 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class DataPartitionerRemoteMapper method configure.

@Override
public void configure(JobConf job) {
    MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
    InputInfo ii = MRJobConfiguration.getPartitioningInputInfo(job);
    OutputInfo oi = MRJobConfiguration.getPartitioningOutputInfo(job);
    PDataPartitionFormat pdf = MRJobConfiguration.getPartitioningFormat(job);
    int n = MRJobConfiguration.getPartitioningSizeN(job);
    boolean keepIndexes = MRJobConfiguration.getPartitioningIndexFlag(job);
    if (ii == InputInfo.TextCellInputInfo)
        _mapper = new DataPartitionerMapperTextcell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
    else if (ii == InputInfo.BinaryCellInputInfo)
        _mapper = new DataPartitionerMapperBinarycell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
    else if (ii == InputInfo.BinaryBlockInputInfo) {
        if (oi == OutputInfo.BinaryBlockOutputInfo)
            _mapper = new DataPartitionerMapperBinaryblock(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes);
        else if (oi == OutputInfo.BinaryCellOutputInfo) {
            // fused parfor
            boolean outputEmpty = MRJobConfiguration.getProgramBlocks(job) != null;
            _mapper = new DataPartitionerMapperBinaryblock2Binarycell(job, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes, outputEmpty);
        } else
            throw new RuntimeException("Partitioning from '" + ii + "' to '" + oi + "' not supported");
    } else
        throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 75 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class QuaternaryInstruction method computeMatrixCharacteristics.

public void computeMatrixCharacteristics(MatrixCharacteristics mc1, MatrixCharacteristics mc2, MatrixCharacteristics mc3, MatrixCharacteristics dimOut) {
    QuaternaryOperator qop = (QuaternaryOperator) optr;
    if (qop.wtype1 != null || qop.wtype4 != null) {
        // wsloss/wcemm
        // output size independent of chain type (scalar)
        dimOut.set(1, 1, mc1.getRowsPerBlock(), mc1.getColsPerBlock());
    } else if (qop.wtype2 != null || qop.wtype5 != null) {
        // wsigmoid/wumm
        // output size determined by main input
        dimOut.set(mc1.getRows(), mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
    } else if (qop.wtype3 != null) {
        // wdivmm
        // note: cannot directly consume mc2 or mc3 for redwdivmm because rep instruction changed
        // the relevant dimensions; as a workaround the original dims are passed via nnz
        boolean mapwdivmm = _cacheU && _cacheV;
        long rank = qop.wtype3.isLeft() ? mapwdivmm ? mc3.getCols() : mc3.getNonZeros() : mapwdivmm ? mc2.getCols() : mc2.getNonZeros();
        MatrixCharacteristics mcTmp = qop.wtype3.computeOutputCharacteristics(mc1.getRows(), mc1.getCols(), rank);
        dimOut.set(mcTmp.getRows(), mcTmp.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
    }
}
Also used : QuaternaryOperator(org.apache.sysml.runtime.matrix.operators.QuaternaryOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)296 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)102 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)89 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)70 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)50 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)47 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)45 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)42 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)37 CellIndex (org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)37 IOException (java.io.IOException)30 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)27 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)22 ArrayList (java.util.ArrayList)19 ValueType (org.apache.sysml.parser.Expression.ValueType)19 Path (org.apache.hadoop.fs.Path)17 LongWritable (org.apache.hadoop.io.LongWritable)16 Test (org.junit.Test)15 Text (org.apache.hadoop.io.Text)14