Search in sources :

Example 56 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class PmmSPInstruction method parseInstruction.

public static PmmSPInstruction parseInstruction(String str) {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = InstructionUtils.getOpCode(str);
    if (opcode.equalsIgnoreCase(PMMJ.OPCODE)) {
        CPOperand in1 = new CPOperand(parts[1]);
        CPOperand in2 = new CPOperand(parts[2]);
        CPOperand nrow = new CPOperand(parts[3]);
        CPOperand out = new CPOperand(parts[4]);
        CacheType type = CacheType.valueOf(parts[5]);
        AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
        AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
        return new PmmSPInstruction(aggbin, in1, in2, out, nrow, type, opcode, str);
    } else {
        throw new DMLRuntimeException("PmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
    }
}
Also used : AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) AggregateBinaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) CacheType(org.apache.sysml.lops.MapMult.CacheType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 57 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class QuantileSortSPInstruction method parseInstruction.

public static QuantileSortSPInstruction parseInstruction(String str) {
    CPOperand in1 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
    CPOperand in2 = null;
    CPOperand out = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = parts[0];
    if (opcode.equalsIgnoreCase(SortKeys.OPCODE)) {
        if (parts.length == 3) {
            // Example: sort:mVar1:mVar2 (input=mVar1, output=mVar2)
            parseUnaryInstruction(str, in1, out);
            return new QuantileSortSPInstruction(new SimpleOperator(null), in1, out, opcode, str);
        } else if (parts.length == 4) {
            // Example: sort:mVar1:mVar2:mVar3 (input=mVar1, weights=mVar2, output=mVar3)
            in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
            parseUnaryInstruction(str, in1, in2, out);
            return new QuantileSortSPInstruction(new SimpleOperator(null), in1, in2, out, opcode, str);
        } else {
            throw new DMLRuntimeException("Invalid number of operands in instruction: " + str);
        }
    } else {
        throw new DMLRuntimeException("Unknown opcode while parsing a SortSPInstruction: " + str);
    }
}
Also used : SimpleOperator(org.apache.sysml.runtime.matrix.operators.SimpleOperator) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 58 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class RandSPInstruction method generateRandData.

private void generateRandData(SparkExecutionContext sec) {
    long lrows = sec.getScalarInput(rows).getLongValue();
    long lcols = sec.getScalarInput(cols).getLongValue();
    // step 1: generate pseudo-random seed (because not specified)
    // seed per invocation
    long lSeed = seed;
    if (lSeed == DataGenOp.UNSPECIFIED_SEED)
        lSeed = DataGenOp.generateRandomSeed();
    if (LOG.isTraceEnabled())
        LOG.trace("Process RandSPInstruction rand with seed = " + lSeed + ".");
    // step 2: potential in-memory rand operations if applicable
    if (isMemAvail(lrows, lcols, sparsity, minValue, maxValue) && DMLScript.rtplatform != RUNTIME_PLATFORM.SPARK) {
        RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
        MatrixBlock mb = MatrixBlock.randOperations(rgen, lSeed);
        sec.setMatrixOutput(output.getName(), mb, getExtendedOpcode());
        Statistics.decrementNoOfExecutedSPInst();
        return;
    }
    // step 3: seed generation
    JavaPairRDD<MatrixIndexes, Long> seedsRDD = null;
    Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(lSeed);
    double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(lrows, lcols, rowsInBlock, colsInBlock, // overestimate for on disk, ensures hdfs block per partition
    sparsity);
    double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
    MatrixCharacteristics tmp = new MatrixCharacteristics(lrows, lcols, rowsInBlock, colsInBlock);
    long numBlocks = tmp.getNumBlocks();
    long numColBlocks = tmp.getNumColBlocks();
    // a) in-memory seed rdd construction
    if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
        ArrayList<Tuple2<MatrixIndexes, Long>> seeds = new ArrayList<>();
        for (long i = 0; i < numBlocks; i++) {
            long r = 1 + i / numColBlocks;
            long c = 1 + i % numColBlocks;
            MatrixIndexes indx = new MatrixIndexes(r, c);
            Long seedForBlock = bigrand.nextLong();
            seeds.add(new Tuple2<>(indx, seedForBlock));
        }
        // for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        // create seeds rdd
        seedsRDD = sec.getSparkContext().parallelizePairs(seeds, numPartitions);
    } else // b) file-based seed rdd construction (for robustness wrt large number of blocks)
    {
        Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
        PrintWriter pw = null;
        try {
            FileSystem fs = IOUtilFunctions.getFileSystem(path);
            pw = new PrintWriter(fs.create(path));
            StringBuilder sb = new StringBuilder();
            for (long i = 0; i < numBlocks; i++) {
                sb.append(1 + i / numColBlocks);
                sb.append(',');
                sb.append(1 + i % numColBlocks);
                sb.append(',');
                sb.append(bigrand.nextLong());
                pw.println(sb.toString());
                sb.setLength(0);
            }
        } catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        } finally {
            IOUtilFunctions.closeSilently(pw);
        }
        // for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        // create seeds rdd
        seedsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).mapToPair(new ExtractSeedTuple());
    }
    // step 4: execute rand instruction over seed input
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = seedsRDD.mapToPair(new GenerateRandomBlock(lrows, lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdf, pdfParams));
    // step 5: output handling
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (!mcOut.dimsKnown(true)) {
        // note: we cannot compute the nnz from sparsity because this would not reflect the
        // actual number of non-zeros, except for extreme values of sparsity equals 0 or 1.
        long lnnz = (sparsity == 0 || sparsity == 1) ? (long) (sparsity * lrows * lcols) : -1;
        mcOut.set(lrows, lcols, rowsInBlock, colsInBlock, lnnz);
    }
    sec.setRDDHandleForVariable(output.getName(), out);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) Tuple2(scala.Tuple2) FileSystem(org.apache.hadoop.fs.FileSystem) Well1024a(org.apache.commons.math3.random.Well1024a) PrintWriter(java.io.PrintWriter)

Example 59 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class RandSPInstruction method parseInstruction.

public static RandSPInstruction parseInstruction(String str) {
    String[] s = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = s[0];
    DataGenMethod method = DataGenMethod.INVALID;
    if (opcode.equalsIgnoreCase(DataGen.RAND_OPCODE)) {
        method = DataGenMethod.RAND;
        InstructionUtils.checkNumFields(str, 12);
    } else if (opcode.equalsIgnoreCase(DataGen.SEQ_OPCODE)) {
        method = DataGenMethod.SEQ;
        // 8 operands: rows, cols, rpb, cpb, from, to, incr, outvar
        InstructionUtils.checkNumFields(str, 8);
    } else if (opcode.equalsIgnoreCase(DataGen.SAMPLE_OPCODE)) {
        method = DataGenMethod.SAMPLE;
        // 7 operands: range, size, replace, seed, rpb, cpb, outvar
        InstructionUtils.checkNumFields(str, 7);
    }
    Operator op = null;
    // output is specified by the last operand
    CPOperand out = new CPOperand(s[s.length - 1]);
    if (method == DataGenMethod.RAND) {
        CPOperand rows = new CPOperand(s[1]);
        CPOperand cols = new CPOperand(s[2]);
        int rpb = Integer.parseInt(s[3]);
        int cpb = Integer.parseInt(s[4]);
        double minValue = !s[5].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Double.valueOf(s[5]).doubleValue() : -1;
        double maxValue = !s[6].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Double.valueOf(s[6]).doubleValue() : -1;
        double sparsity = !s[7].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Double.valueOf(s[7]).doubleValue() : -1;
        long seed = !s[8].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Long.valueOf(s[8]).longValue() : -1;
        String dir = s[9];
        String pdf = s[10];
        String pdfParams = !s[11].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? s[11] : null;
        return new RandSPInstruction(op, method, null, out, rows, cols, rpb, cpb, minValue, maxValue, sparsity, seed, dir, pdf, pdfParams, opcode, str);
    } else if (method == DataGenMethod.SEQ) {
        int rpb = Integer.parseInt(s[3]);
        int cpb = Integer.parseInt(s[4]);
        CPOperand from = new CPOperand(s[5]);
        CPOperand to = new CPOperand(s[6]);
        CPOperand incr = new CPOperand(s[7]);
        CPOperand in = null;
        return new RandSPInstruction(op, method, in, out, null, null, rpb, cpb, from, to, incr, opcode, str);
    } else if (method == DataGenMethod.SAMPLE) {
        double max = !s[1].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? Double.valueOf(s[1]) : 0;
        CPOperand rows = new CPOperand(s[2]);
        CPOperand cols = new CPOperand("1", ValueType.INT, DataType.SCALAR);
        boolean replace = (!s[3].contains(Lop.VARIABLE_NAME_PLACEHOLDER) && Boolean.valueOf(s[3]));
        long seed = Long.parseLong(s[4]);
        int rpb = Integer.parseInt(s[5]);
        int cpb = Integer.parseInt(s[6]);
        return new RandSPInstruction(op, method, null, out, rows, cols, rpb, cpb, max, replace, seed, opcode, str);
    } else
        throw new DMLRuntimeException("Unrecognized data generation method: " + method);
}
Also used : Operator(org.apache.sysml.runtime.matrix.operators.Operator) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) DataGenMethod(org.apache.sysml.hops.Hop.DataGenMethod) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 60 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class RandSPInstruction method generateSample.

/**
 * Helper function to construct a sample.
 *
 * @param sec spark execution context
 */
private void generateSample(SparkExecutionContext sec) {
    long lrows = sec.getScalarInput(rows).getLongValue();
    if (maxValue < lrows && !replace)
        throw new DMLRuntimeException("Sample (size=" + rows + ") larger than population (size=" + maxValue + ") can only be generated with replacement.");
    if (LOG.isTraceEnabled())
        LOG.trace("Process RandSPInstruction sample with range=" + maxValue + ", size=" + lrows + ", replace=" + replace + ", seed=" + seed);
    // sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of the time.
    double fraction = SamplingUtils.computeFractionForSampleSize((int) lrows, UtilFunctions.toLong(maxValue), replace);
    Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(seed);
    // divide the population range across numPartitions by creating SampleTasks
    double hdfsBlockSize = InfrastructureAnalyzer.getHDFSBlockSize();
    long outputSize = MatrixBlock.estimateSizeDenseInMemory(lrows, 1);
    int numPartitions = (int) Math.ceil((double) outputSize / hdfsBlockSize);
    long partitionSize = (long) Math.ceil(maxValue / numPartitions);
    ArrayList<SampleTask> offsets = new ArrayList<>();
    long st = 1;
    while (st <= maxValue) {
        SampleTask s = new SampleTask();
        s.range_start = st;
        s.seed = bigrand.nextLong();
        offsets.add(s);
        st = st + partitionSize;
    }
    JavaRDD<SampleTask> offsetRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
    // Construct the sample in a distributed manner
    JavaRDD<Double> rdd = offsetRDD.flatMap((new GenerateSampleBlock(replace, fraction, (long) maxValue, partitionSize)));
    // Randomize the sampled elements
    JavaRDD<Double> randomizedRDD = rdd.mapToPair(new AttachRandom()).sortByKey().values();
    // Trim the sampled list to required size & attach matrix indexes to randomized elements
    JavaPairRDD<MatrixIndexes, MatrixCell> miRDD = randomizedRDD.zipWithIndex().filter(new TrimSample(lrows)).mapToPair(new Double2MatrixCell());
    MatrixCharacteristics mcOut = new MatrixCharacteristics(lrows, 1, rowsInBlock, colsInBlock, lrows);
    // Construct BinaryBlock representation
    JavaPairRDD<MatrixIndexes, MatrixBlock> mbRDD = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), miRDD, mcOut, true);
    sec.getMatrixCharacteristics(output.getName()).setNonZeros(lrows);
    sec.setRDDHandleForVariable(output.getName(), mbRDD);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Well1024a(org.apache.commons.math3.random.Well1024a)

Aggregations

DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)579 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)104 IOException (java.io.IOException)102 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)85 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)78 ArrayList (java.util.ArrayList)75 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)49 Path (org.apache.hadoop.fs.Path)43 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)40 ExecutorService (java.util.concurrent.ExecutorService)38 Pointer (jcuda.Pointer)37 Future (java.util.concurrent.Future)35 CSRPointer (org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)30 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)26 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)26 FileSystem (org.apache.hadoop.fs.FileSystem)25 JobConf (org.apache.hadoop.mapred.JobConf)23 Operator (org.apache.sysml.runtime.matrix.operators.Operator)22 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)20 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)19