Search in sources :

Example 66 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class CompressedMatrixBlock method chainMatrixMultOperations.

@Override
public MatrixBlock chainMatrixMultOperations(MatrixBlock v, MatrixBlock w, MatrixBlock out, ChainType ctype, int k) {
    // call uncompressed matrix mult if necessary
    if (!isCompressed()) {
        return super.chainMatrixMultOperations(v, w, out, ctype, k);
    }
    // multi-threaded mmchain of single uncompressed colgroup
    if (isSingleUncompressedGroup()) {
        return ((ColGroupUncompressed) _colGroups.get(0)).getData().chainMatrixMultOperations(v, w, out, ctype, k);
    }
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    // prepare result
    if (out != null)
        out.reset(clen, 1, false);
    else
        out = new MatrixBlock(clen, 1, false);
    // empty block handling
    if (isEmptyBlock(false))
        return out;
    // compute matrix mult
    MatrixBlock tmp = new MatrixBlock(rlen, 1, false);
    rightMultByVector(v, tmp, k);
    if (ctype == ChainType.XtwXv) {
        BinaryOperator bop = new BinaryOperator(Multiply.getMultiplyFnObject());
        LibMatrixBincell.bincellOpInPlace(tmp, w, bop);
    }
    leftMultByVectorTranspose(_colGroups, tmp, out, true, k);
    if (LOG.isDebugEnabled())
        LOG.debug("Compressed MMChain k=" + k + " in " + time.stop());
    return out;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) AggregateBinaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator) BinaryOperator(org.apache.sysml.runtime.matrix.operators.BinaryOperator)

Example 67 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class CompressedMatrixBlock method compress.

/**
 * Compress block.
 *
 * @param k  number of threads
 * @return compressed matrix block or original block if incompressible
 */
public MatrixBlock compress(int k) {
    // check for redundant compression
    if (isCompressed()) {
        throw new DMLRuntimeException("Redundant compression, block already compressed.");
    }
    Timing time = new Timing(true);
    _stats = new CompressionStatistics();
    // SAMPLE-BASED DECISIONS:
    // Decisions such as testing if a column is amenable to bitmap
    // compression or evaluating co-coding potentionls are made based on a
    // subset of the rows. For large datasets, sampling might take a
    // significant amount of time. So, we generate only one sample and use
    // it for the entire compression process.
    // prepare basic meta data and deep copy / transpose input
    final int numRows = getNumRows();
    final int numCols = getNumColumns();
    final boolean sparse = isInSparseFormat();
    MatrixBlock rawblock = !TRANSPOSE_INPUT ? new MatrixBlock(this) : LibMatrixReorg.transpose(this, new MatrixBlock(numCols, numRows, sparse), k);
    // construct sample-based size estimator
    CompressedSizeEstimator bitmapSizeEstimator = SizeEstimatorFactory.getSizeEstimator(rawblock, numRows);
    // PHASE 1: Classify columns by compression type
    // We start by determining which columns are amenable to compression
    List<Integer> colsC = new ArrayList<>();
    List<Integer> colsUC = new ArrayList<>();
    HashMap<Integer, Double> compRatios = new HashMap<>();
    // Classify columns according to ratio (size uncompressed / size compressed),
    // where a column is compressible if ratio > 1.
    CompressedSizeInfo[] sizeInfos = (k > 1) ? computeCompressedSizeInfos(bitmapSizeEstimator, numCols, k) : computeCompressedSizeInfos(bitmapSizeEstimator, numCols);
    long nnzUC = 0;
    for (int col = 0; col < numCols; col++) {
        double uncompSize = getUncompressedSize(numRows, 1, OptimizerUtils.getSparsity(numRows, 1, sizeInfos[col].getEstNnz()));
        double compRatio = uncompSize / sizeInfos[col].getMinSize();
        if (compRatio > 1) {
            colsC.add(col);
            compRatios.put(col, compRatio);
        } else {
            colsUC.add(col);
            nnzUC += sizeInfos[col].getEstNnz();
        }
    }
    // correction of column classification (reevaluate dense estimates if necessary)
    boolean sparseUC = MatrixBlock.evalSparseFormatInMemory(numRows, colsUC.size(), nnzUC);
    if (!sparseUC && !colsUC.isEmpty()) {
        for (int i = 0; i < colsUC.size(); i++) {
            int col = colsUC.get(i);
            double uncompSize = getUncompressedSize(numRows, 1, 1.0);
            double compRatio = uncompSize / sizeInfos[col].getMinSize();
            if (compRatio > 1) {
                colsC.add(col);
                colsUC.remove(i);
                i--;
                compRatios.put(col, compRatio);
                nnzUC -= sizeInfos[col].getEstNnz();
            }
        }
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("C: " + Arrays.toString(colsC.toArray(new Integer[0])));
        LOG.trace("-- compression ratios: " + Arrays.toString(colsC.stream().map(c -> compRatios.get(c)).toArray()));
        LOG.trace("UC: " + Arrays.toString(colsUC.toArray(new Integer[0])));
        LOG.trace("-- compression ratios: " + Arrays.toString(colsUC.stream().map(c -> compRatios.get(c)).toArray()));
    }
    if (LOG.isDebugEnabled()) {
        _stats.timePhase1 = time.stop();
        LOG.debug("Compression statistics:");
        LOG.debug("--compression phase 1: " + _stats.timePhase1);
    }
    if (colsC.isEmpty()) {
        if (LOG.isDebugEnabled())
            LOG.debug("Abort block compression because all columns are incompressible.");
        return new MatrixBlock().copyShallow(this);
    }
    // PHASE 2: Grouping columns
    // Divide the bitmap columns into column groups.
    List<int[]> bitmapColGrps = PlanningCoCoder.findCocodesByPartitioning(bitmapSizeEstimator, colsC, sizeInfos, numRows, k);
    if (LOG.isDebugEnabled()) {
        _stats.timePhase2 = time.stop();
        LOG.debug("--compression phase 2: " + _stats.timePhase2);
    }
    if (INVESTIGATE_ESTIMATES) {
        double est = 0;
        for (int[] groupIndices : bitmapColGrps) est += bitmapSizeEstimator.estimateCompressedColGroupSize(groupIndices).getMinSize();
        est += MatrixBlock.estimateSizeInMemory(numRows, colsUC.size(), OptimizerUtils.getSparsity(numRows, colsUC.size(), nnzUC));
        _stats.estSize = est;
    }
    // PHASE 3: Compress and correct sample-based decisions
    ColGroup[] colGroups = (k > 1) ? compressColGroups(rawblock, bitmapSizeEstimator, compRatios, numRows, bitmapColGrps, colsUC.isEmpty(), k) : compressColGroups(rawblock, bitmapSizeEstimator, compRatios, numRows, bitmapColGrps, colsUC.isEmpty());
    allocateColGroupList();
    HashSet<Integer> remainingCols = seq(0, numCols - 1, 1);
    for (int j = 0; j < colGroups.length; j++) {
        if (colGroups[j] != null) {
            for (int col : colGroups[j].getColIndices()) remainingCols.remove(col);
            _colGroups.add(colGroups[j]);
        }
    }
    if (LOG.isDebugEnabled()) {
        _stats.timePhase3 = time.stop();
        LOG.debug("--compression phase 3: " + _stats.timePhase3);
    }
    // PHASE 4: Best-effort dictionary sharing for DDC1 single-col groups
    double[] dict = createSharedDDC1Dictionary(_colGroups);
    if (dict != null) {
        applySharedDDC1Dictionary(_colGroups, dict);
        _sharedDDC1Dict = true;
    }
    if (LOG.isDebugEnabled()) {
        _stats.timePhase4 = time.stop();
        LOG.debug("--compression phase 4: " + _stats.timePhase4);
    }
    // The remaining columns are stored uncompressed as one big column group
    if (!remainingCols.isEmpty()) {
        ArrayList<Integer> list = new ArrayList<>(remainingCols);
        ColGroupUncompressed ucgroup = new ColGroupUncompressed(list, rawblock);
        _colGroups.add(ucgroup);
    }
    _stats.size = estimateCompressedSizeInMemory();
    _stats.ratio = estimateSizeInMemory() / _stats.size;
    if (_stats.ratio < 1) {
        if (LOG.isDebugEnabled())
            LOG.debug("Abort block compression because compression ratio is less than 1.");
        return new MatrixBlock().copyShallow(this);
    }
    // final cleanup (discard uncompressed block)
    rawblock.cleanupBlock(true, true);
    this.cleanupBlock(true, true);
    if (LOG.isDebugEnabled()) {
        _stats.timePhase5 = time.stop();
        int[] counts = getColGroupCounts(_colGroups);
        LOG.debug("--compression phase 5: " + _stats.timePhase5);
        LOG.debug("--num col groups: " + _colGroups.size());
        LOG.debug("--col groups types (OLE,RLE,DDC1,DDC2,UC): " + counts[2] + "," + counts[1] + "," + counts[3] + "," + counts[4] + "," + counts[0]);
        LOG.debug("--col groups sizes (OLE,RLE,DDC1,DDC2,UC): " + counts[7] + "," + counts[6] + "," + counts[8] + "," + counts[9] + "," + counts[5]);
        LOG.debug("--compressed size: " + _stats.size);
        LOG.debug("--compression ratio: " + _stats.ratio);
    }
    return this;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CompressedSizeInfo(org.apache.sysml.runtime.compress.estim.CompressedSizeInfo) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CompressedSizeEstimator(org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 68 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class CompressedMatrixBlock method transposeSelfMatrixMultOperations.

@Override
public MatrixBlock transposeSelfMatrixMultOperations(MatrixBlock out, MMTSJType tstype, int k) {
    // call uncompressed matrix mult if necessary
    if (!isCompressed()) {
        return super.transposeSelfMatrixMultOperations(out, tstype, k);
    }
    // multi-threaded tsmm of single uncompressed colgroup
    if (isSingleUncompressedGroup()) {
        return ((ColGroupUncompressed) _colGroups.get(0)).getData().transposeSelfMatrixMultOperations(out, tstype, k);
    }
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    // check for transpose type
    if (// right not supported yet
    tstype != MMTSJType.LEFT)
        throw new DMLRuntimeException("Invalid MMTSJ type '" + tstype.toString() + "'.");
    // create output matrix block
    if (out == null)
        out = new MatrixBlock(clen, clen, false);
    else
        out.reset(clen, clen, false);
    out.allocateDenseBlock();
    if (!isEmptyBlock(false)) {
        // compute matrix mult
        try {
            ExecutorService pool = CommonThreadPool.get(k);
            ArrayList<MatrixMultTransposeTask> tasks = new ArrayList<>();
            int numgrp = _colGroups.size();
            int blklen = (int) (Math.ceil((double) numgrp / (2 * k)));
            for (int i = 0; i < 2 * k & i * blklen < clen; i++) tasks.add(new MatrixMultTransposeTask(_colGroups, out, i * blklen, Math.min((i + 1) * blklen, numgrp)));
            List<Future<Object>> ret = pool.invokeAll(tasks);
            for (Future<Object> tret : ret) // check for errors
            tret.get();
            pool.shutdown();
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
        // post-processing
        out.setNonZeros(LinearAlgebraUtils.copyUpperToLowerTriangle(out));
    }
    if (LOG.isDebugEnabled())
        LOG.debug("Compressed TSMM k=" + k + " in " + time.stop());
    return out;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 69 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class ParWorker method executeRangeTask.

private void executeRangeTask(Task task) {
    // monitoring start
    Timing time1 = null, time2 = null;
    if (_monitor) {
        time1 = new Timing(true);
        time2 = new Timing(true);
    }
    // core execution
    List<IntObject> tmp = task.getIterations();
    String lVarName = task.getVarName();
    long lFrom = tmp.get(0).getLongValue();
    long lTo = tmp.get(1).getLongValue();
    long lIncr = tmp.get(2).getLongValue();
    for (long i = lFrom; i <= lTo; i += lIncr) {
        // set index values
        _ec.setVariable(lVarName, new IntObject(i));
        // for each program block
        for (ProgramBlock pb : _childBlocks) pb.execute(_ec);
        _numIters++;
        if (_monitor)
            StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_ITER_T, time1.stop());
    }
    _numTasks++;
    // monitoring end
    if (_monitor) {
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASKSIZE, task.size());
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASK_T, time2.stop());
    }
}
Also used : IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) ProgramBlock(org.apache.sysml.runtime.controlprogram.ProgramBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 70 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class ParForProgramBlock method executeRemoteMRParFor.

private void executeRemoteMRParFor(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
    /* Step 0) check and recompile MR inst
		 * Step 1) serialize child PB and inst
		 * Step 2) create and serialize tasks
		 * Step 3) submit MR Jobs and wait for results
		 * Step 4) collect results from each parallel worker
		 */
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = false;
    if (FORCE_CP_ON_REMOTE_MR && (_optMode == POptMode.NONE || (_optMode == POptMode.CONSTRAINED && _execMode == PExecMode.REMOTE_MR))) {
        // tid = 0  because replaced in remote parworker
        flagForced = checkMRAndRecompileToCP(0);
    }
    // Step 1) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single
    // job, cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    String program = ProgramConverter.serializeParForBody(body);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 2) create tasks
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String taskFile = constructTaskFileName();
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    int maxDigits = (int) Math.log10(to.getLongValue()) + 1;
    long numCreatedTasks = -1;
    if (USE_STREAMING_TASK_CREATION) {
        LocalTaskQueue<Task> queue = new LocalTaskQueue<>();
        // put tasks into queue and start writing to taskFile
        numCreatedTasks = partitioner.createTasks(queue);
        taskFile = writeTasksToFile(taskFile, queue, maxDigits);
    } else {
        // sequentially create tasks and write to disk
        List<Task> tasks = partitioner.createTasks();
        numCreatedTasks = tasks.size();
        taskFile = writeTasksToFile(taskFile, tasks, maxDigits);
    }
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    // write matrices to HDFS
    exportMatricesToHDFS(ec);
    // Step 3) submit MR job (wait for finished work)
    MatrixObject colocatedDPMatrixObj = (_colocatedDPMatrix != null) ? ec.getMatrixObject(_colocatedDPMatrix) : null;
    RemoteParForJobReturn ret = RemoteParForMR.runJob(_ID, program, taskFile, resultFile, colocatedDPMatrixObj, _enableCPCaching, _numThreads, WRITE_REPLICATION_FACTOR, MAX_RETRYS_ON_ERROR, getMinMemory(ec), (ALLOW_REUSE_MR_JVMS & _jvmReuse));
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 4) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    // consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (// see step 0
    flagForced)
        releaseForcedRecompile(0);
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) Task(org.apache.sysml.runtime.controlprogram.parfor.Task) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) LocalTaskQueue(org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Aggregations

Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)78 IOException (java.io.IOException)31 ArrayList (java.util.ArrayList)29 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)25 HashMap (java.util.HashMap)24 Connection (org.apache.sysml.api.jmlc.Connection)17 PreparedScript (org.apache.sysml.api.jmlc.PreparedScript)17 ResultVariables (org.apache.sysml.api.jmlc.ResultVariables)17 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)17 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)14 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)10 TaskPartitioner (org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)10 ParForBody (org.apache.sysml.runtime.controlprogram.parfor.ParForBody)8 RemoteParForJobReturn (org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn)8 LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)7 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)7 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 LocalTaskQueue (org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue)6 Task (org.apache.sysml.runtime.controlprogram.parfor.Task)6