Search in sources :

Example 1 with RandomMatrixGenerator

use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project incubator-systemml by apache.

the class RandSPInstruction method generateRandData.

private void generateRandData(SparkExecutionContext sec) {
    long lrows = sec.getScalarInput(rows).getLongValue();
    long lcols = sec.getScalarInput(cols).getLongValue();
    // step 1: generate pseudo-random seed (because not specified)
    // seed per invocation
    long lSeed = seed;
    if (lSeed == DataGenOp.UNSPECIFIED_SEED)
        lSeed = DataGenOp.generateRandomSeed();
    if (LOG.isTraceEnabled())
        LOG.trace("Process RandSPInstruction rand with seed = " + lSeed + ".");
    // step 2: potential in-memory rand operations if applicable
    if (isMemAvail(lrows, lcols, sparsity, minValue, maxValue) && DMLScript.rtplatform != RUNTIME_PLATFORM.SPARK) {
        RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
        MatrixBlock mb = MatrixBlock.randOperations(rgen, lSeed);
        sec.setMatrixOutput(output.getName(), mb, getExtendedOpcode());
        Statistics.decrementNoOfExecutedSPInst();
        return;
    }
    // step 3: seed generation
    JavaPairRDD<MatrixIndexes, Long> seedsRDD = null;
    Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(lSeed);
    double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(lrows, lcols, rowsInBlock, colsInBlock, // overestimate for on disk, ensures hdfs block per partition
    sparsity);
    double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
    MatrixCharacteristics tmp = new MatrixCharacteristics(lrows, lcols, rowsInBlock, colsInBlock);
    long numBlocks = tmp.getNumBlocks();
    long numColBlocks = tmp.getNumColBlocks();
    // a) in-memory seed rdd construction
    if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
        ArrayList<Tuple2<MatrixIndexes, Long>> seeds = new ArrayList<>();
        for (long i = 0; i < numBlocks; i++) {
            long r = 1 + i / numColBlocks;
            long c = 1 + i % numColBlocks;
            MatrixIndexes indx = new MatrixIndexes(r, c);
            Long seedForBlock = bigrand.nextLong();
            seeds.add(new Tuple2<>(indx, seedForBlock));
        }
        // for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        // create seeds rdd
        seedsRDD = sec.getSparkContext().parallelizePairs(seeds, numPartitions);
    } else // b) file-based seed rdd construction (for robustness wrt large number of blocks)
    {
        Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
        PrintWriter pw = null;
        try {
            FileSystem fs = IOUtilFunctions.getFileSystem(path);
            pw = new PrintWriter(fs.create(path));
            StringBuilder sb = new StringBuilder();
            for (long i = 0; i < numBlocks; i++) {
                sb.append(1 + i / numColBlocks);
                sb.append(',');
                sb.append(1 + i % numColBlocks);
                sb.append(',');
                sb.append(bigrand.nextLong());
                pw.println(sb.toString());
                sb.setLength(0);
            }
        } catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        } finally {
            IOUtilFunctions.closeSilently(pw);
        }
        // for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        // create seeds rdd
        seedsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).mapToPair(new ExtractSeedTuple());
    }
    // step 4: execute rand instruction over seed input
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = seedsRDD.mapToPair(new GenerateRandomBlock(lrows, lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdf, pdfParams));
    // step 5: output handling
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (!mcOut.dimsKnown(true)) {
        // note: we cannot compute the nnz from sparsity because this would not reflect the
        // actual number of non-zeros, except for extreme values of sparsity equals 0 or 1.
        long lnnz = (sparsity == 0 || sparsity == 1) ? (long) (sparsity * lrows * lcols) : -1;
        mcOut.set(lrows, lcols, rowsInBlock, colsInBlock, lnnz);
    }
    sec.setRDDHandleForVariable(output.getName(), out);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) Tuple2(scala.Tuple2) FileSystem(org.apache.hadoop.fs.FileSystem) Well1024a(org.apache.commons.math3.random.Well1024a) PrintWriter(java.io.PrintWriter)

Example 2 with RandomMatrixGenerator

use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project incubator-systemml by apache.

the class DataGenCPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    MatrixBlock soresBlock = null;
    // process specific datagen operator
    if (method == DataGenMethod.RAND) {
        long lrows = ec.getScalarInput(rows).getLongValue();
        long lcols = ec.getScalarInput(cols).getLongValue();
        checkValidDimensions(lrows, lcols);
        // generate pseudo-random seed (because not specified)
        // seed per invocation
        long lSeed = seed;
        if (lSeed == DataGenOp.UNSPECIFIED_SEED)
            lSeed = DataGenOp.generateRandomSeed();
        if (LOG.isTraceEnabled())
            LOG.trace("Process DataGenCPInstruction rand with seed = " + lSeed + ".");
        RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
        soresBlock = MatrixBlock.randOperations(rgen, seed, numThreads);
    } else if (method == DataGenMethod.SEQ) {
        double lfrom = ec.getScalarInput(seq_from).getDoubleValue();
        double lto = ec.getScalarInput(seq_to).getDoubleValue();
        double lincr = ec.getScalarInput(seq_incr).getDoubleValue();
        // handle default 1 to -1 for special case of from>to
        lincr = LibMatrixDatagen.updateSeqIncr(lfrom, lto, lincr);
        if (LOG.isTraceEnabled())
            LOG.trace("Process DataGenCPInstruction seq with seqFrom=" + lfrom + ", seqTo=" + lto + ", seqIncr" + lincr);
        soresBlock = MatrixBlock.seqOperations(lfrom, lto, lincr);
    } else if (method == DataGenMethod.SAMPLE) {
        long lrows = ec.getScalarInput(rows).getLongValue();
        long range = UtilFunctions.toLong(maxValue);
        checkValidDimensions(lrows, 1);
        if (LOG.isTraceEnabled())
            LOG.trace("Process DataGenCPInstruction sample with range=" + range + ", size=" + lrows + ", replace" + replace + ", seed=" + seed);
        if (range < lrows && !replace)
            throw new DMLRuntimeException("Sample (size=" + lrows + ") larger than population (size=" + range + ") can only be generated with replacement.");
        soresBlock = MatrixBlock.sampleOperations(range, (int) lrows, replace, seed);
    }
    // guarded sparse block representation change
    if (soresBlock.getInMemorySize() < OptimizerUtils.SAFE_REP_CHANGE_THRES)
        soresBlock.examSparsity();
    // release created output
    ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode());
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 3 with RandomMatrixGenerator

use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project incubator-systemml by apache.

the class RunMRJobs method executeInMemoryDataGenOperations.

private static JobReturn executeInMemoryDataGenOperations(MRJobInstruction inst, String randInst, MatrixObject[] outputMatrices) {
    MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
    DataGenMRInstruction[] dgSet = MRInstructionParser.parseDataGenInstructions(randInst);
    byte[] results = inst.getIv_resultIndices();
    for (DataGenMRInstruction ldgInst : dgSet) {
        if (ldgInst instanceof RandInstruction) {
            // CP Rand block operation
            RandInstruction lrand = (RandInstruction) ldgInst;
            RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(lrand.getProbabilityDensityFunction(), (int) lrand.getRows(), (int) lrand.getCols(), lrand.getRowsInBlock(), lrand.getColsInBlock(), lrand.getSparsity(), lrand.getMinValue(), lrand.getMaxValue(), lrand.getPdfParams());
            MatrixBlock mb = MatrixBlock.randOperations(rgen, lrand.getSeed());
            for (int i = 0; i < results.length; i++) if (lrand.output == results[i]) {
                outputMatrices[i].acquireModify(mb);
                outputMatrices[i].release();
                mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lrand.getRowsInBlock(), lrand.getColsInBlock(), mb.getNonZeros());
            }
        } else if (ldgInst instanceof SeqInstruction) {
            SeqInstruction lseq = (SeqInstruction) ldgInst;
            MatrixBlock mb = MatrixBlock.seqOperations(lseq.fromValue, lseq.toValue, lseq.incrValue);
            for (int i = 0; i < results.length; i++) if (lseq.output == results[i]) {
                outputMatrices[i].acquireModify(mb);
                outputMatrices[i].release();
                mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lseq.getRowsInBlock(), lseq.getColsInBlock(), mb.getNonZeros());
            }
        }
    }
    return new JobReturn(mc, inst.getOutputInfos(), true);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SeqInstruction(org.apache.sysml.runtime.instructions.mr.SeqInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 4 with RandomMatrixGenerator

use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project incubator-systemml by apache.

the class DataGenMapper method map.

@Override
public // valueString has to be Text type
void map(Writable key, Writable valueString, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException {
    cachedReporter = reporter;
    long start = System.currentTimeMillis();
    // for each representative matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        DataGenMRInstruction genInst = dataGen_instructions.get(i);
        if (genInst.getDataGenMethod() == DataGenMethod.RAND) {
            RandInstruction randInst = (RandInstruction) genInst;
            String[] params = valueString.toString().split(",");
            long blockRowNumber = Long.parseLong(params[0]);
            long blockColNumber = Long.parseLong(params[1]);
            int blockRowSize = Integer.parseInt(params[2]);
            int blockColSize = Integer.parseInt(params[3]);
            long seed = Long.parseLong(params[4]);
            double minValue = randInst.getMinValue();
            double maxValue = randInst.getMaxValue();
            double sparsity = randInst.getSparsity();
            String pdf = randInst.getProbabilityDensityFunction().toLowerCase();
            // rand data generation
            try {
                indexes[i].setIndexes(blockRowNumber, blockColNumber);
                RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, blockRowSize, blockColSize, blockRowSize, blockColSize, sparsity, minValue, maxValue, randInst.getPdfParams());
                block[i].randOperationsInPlace(rgen, null, seed);
            } catch (DMLRuntimeException e) {
                throw new IOException(e);
            }
        } else if (genInst.getDataGenMethod() == DataGenMethod.SEQ) {
            String[] params = valueString.toString().split(",");
            long blockRowNumber = Long.parseLong(params[0]);
            long blockColNumber = Long.parseLong(params[1]);
            double from = Double.parseDouble(params[2]);
            double to = Double.parseDouble(params[3]);
            double incr = Double.parseDouble(params[4]);
            // handle default 1 to -1 for special case of from>to
            incr = LibMatrixDatagen.updateSeqIncr(from, to, incr);
            // sequence data generation
            try {
                indexes[i].setIndexes(blockRowNumber, blockColNumber);
                block[i].seqOperationsInPlace(from, to, incr);
            } catch (DMLRuntimeException e) {
                throw new IOException(e);
            }
        } else {
            throw new IOException("Unknown data generation instruction: " + genInst.toString());
        }
        // put the input in the cache
        cachedValues.reset();
        cachedValues.set(genInst.output, indexes[i], block[i]);
        // special operations for individual mapp type
        specialOperationsForActualMap(i, out, reporter);
    }
    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) IOException(java.io.IOException) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 5 with RandomMatrixGenerator

use of org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator in project systemml by apache.

the class DataGenCPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    MatrixBlock soresBlock = null;
    // process specific datagen operator
    if (method == DataGenMethod.RAND) {
        long lrows = ec.getScalarInput(rows).getLongValue();
        long lcols = ec.getScalarInput(cols).getLongValue();
        checkValidDimensions(lrows, lcols);
        // generate pseudo-random seed (because not specified)
        // seed per invocation
        long lSeed = seed;
        if (lSeed == DataGenOp.UNSPECIFIED_SEED)
            lSeed = DataGenOp.generateRandomSeed();
        if (LOG.isTraceEnabled())
            LOG.trace("Process DataGenCPInstruction rand with seed = " + lSeed + ".");
        RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
        soresBlock = MatrixBlock.randOperations(rgen, seed, numThreads);
    } else if (method == DataGenMethod.SEQ) {
        double lfrom = ec.getScalarInput(seq_from).getDoubleValue();
        double lto = ec.getScalarInput(seq_to).getDoubleValue();
        double lincr = ec.getScalarInput(seq_incr).getDoubleValue();
        // handle default 1 to -1 for special case of from>to
        lincr = LibMatrixDatagen.updateSeqIncr(lfrom, lto, lincr);
        if (LOG.isTraceEnabled())
            LOG.trace("Process DataGenCPInstruction seq with seqFrom=" + lfrom + ", seqTo=" + lto + ", seqIncr" + lincr);
        soresBlock = MatrixBlock.seqOperations(lfrom, lto, lincr);
    } else if (method == DataGenMethod.SAMPLE) {
        long lrows = ec.getScalarInput(rows).getLongValue();
        long range = UtilFunctions.toLong(maxValue);
        checkValidDimensions(lrows, 1);
        if (LOG.isTraceEnabled())
            LOG.trace("Process DataGenCPInstruction sample with range=" + range + ", size=" + lrows + ", replace" + replace + ", seed=" + seed);
        if (range < lrows && !replace)
            throw new DMLRuntimeException("Sample (size=" + lrows + ") larger than population (size=" + range + ") can only be generated with replacement.");
        soresBlock = MatrixBlock.sampleOperations(range, (int) lrows, replace, seed);
    }
    // guarded sparse block representation change
    if (soresBlock.getInMemorySize() < OptimizerUtils.SAFE_REP_CHANGE_THRES)
        soresBlock.examSparsity();
    // release created output
    ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode());
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

RandomMatrixGenerator (org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator)8 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)6 IOException (java.io.IOException)4 DataGenMRInstruction (org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction)4 RandInstruction (org.apache.sysml.runtime.instructions.mr.RandInstruction)4 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)4 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 Well1024a (org.apache.commons.math3.random.Well1024a)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 SeqInstruction (org.apache.sysml.runtime.instructions.mr.SeqInstruction)2 JobReturn (org.apache.sysml.runtime.matrix.JobReturn)2 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)2 Tuple2 (scala.Tuple2)2