Search in sources :

Example 6 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.

the class RunMRJobs method executeInMemoryDataGenOperations.

private static JobReturn executeInMemoryDataGenOperations(MRJobInstruction inst, String randInst, MatrixObject[] outputMatrices) {
    MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
    DataGenMRInstruction[] dgSet = MRInstructionParser.parseDataGenInstructions(randInst);
    byte[] results = inst.getIv_resultIndices();
    for (DataGenMRInstruction ldgInst : dgSet) {
        if (ldgInst instanceof RandInstruction) {
            // CP Rand block operation
            RandInstruction lrand = (RandInstruction) ldgInst;
            RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(lrand.getProbabilityDensityFunction(), (int) lrand.getRows(), (int) lrand.getCols(), lrand.getRowsInBlock(), lrand.getColsInBlock(), lrand.getSparsity(), lrand.getMinValue(), lrand.getMaxValue(), lrand.getPdfParams());
            MatrixBlock mb = MatrixBlock.randOperations(rgen, lrand.getSeed());
            for (int i = 0; i < results.length; i++) if (lrand.output == results[i]) {
                outputMatrices[i].acquireModify(mb);
                outputMatrices[i].release();
                mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lrand.getRowsInBlock(), lrand.getColsInBlock(), mb.getNonZeros());
            }
        } else if (ldgInst instanceof SeqInstruction) {
            SeqInstruction lseq = (SeqInstruction) ldgInst;
            MatrixBlock mb = MatrixBlock.seqOperations(lseq.fromValue, lseq.toValue, lseq.incrValue);
            for (int i = 0; i < results.length; i++) if (lseq.output == results[i]) {
                outputMatrices[i].acquireModify(mb);
                outputMatrices[i].release();
                mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lseq.getRowsInBlock(), lseq.getColsInBlock(), mb.getNonZeros());
            }
        }
    }
    return new JobReturn(mc, inst.getOutputInfos(), true);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SeqInstruction(org.apache.sysml.runtime.instructions.mr.SeqInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 7 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.

the class DataGenMR method runJob.

/**
 * <p>Starts a Rand MapReduce job which will produce one or more random objects.</p>
 *
 * @param inst MR job instruction
 * @param dataGenInstructions array of data gen instructions
 * @param instructionsInMapper instructions in mapper
 * @param aggInstructionsInReducer aggregate instructions in reducer
 * @param otherInstructionsInReducer other instructions in reducer
 * @param numReducers number of reducers
 * @param replication file replication
 * @param resultIndexes result indexes for each random object
 * @param dimsUnknownFilePrefix file path prefix when dimensions unknown
 * @param outputs output file for each random object
 * @param outputInfos output information for each random object
 * @return matrix characteristics for each random object
 * @throws Exception if Exception occurs
 */
public static JobReturn runJob(MRJobInstruction inst, String[] dataGenInstructions, String instructionsInMapper, String aggInstructionsInReducer, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String dimsUnknownFilePrefix, String[] outputs, OutputInfo[] outputInfos) throws Exception {
    JobConf job = new JobConf(DataGenMR.class);
    job.setJobName("DataGen-MR");
    // whether use block representation or cell representation
    MRJobConfiguration.setMatrixValueClass(job, true);
    byte[] realIndexes = new byte[dataGenInstructions.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    String[] inputs = new String[dataGenInstructions.length];
    InputInfo[] inputInfos = new InputInfo[dataGenInstructions.length];
    long[] rlens = new long[dataGenInstructions.length];
    long[] clens = new long[dataGenInstructions.length];
    int[] brlens = new int[dataGenInstructions.length];
    int[] bclens = new int[dataGenInstructions.length];
    FileSystem fs = FileSystem.get(job);
    String dataGenInsStr = "";
    int numblocks = 0;
    int maxbrlen = -1, maxbclen = -1;
    double maxsparsity = -1;
    for (int i = 0; i < dataGenInstructions.length; i++) {
        dataGenInsStr = dataGenInsStr + Lop.INSTRUCTION_DELIMITOR + dataGenInstructions[i];
        MRInstruction mrins = MRInstructionParser.parseSingleInstruction(dataGenInstructions[i]);
        MRType mrtype = mrins.getMRInstructionType();
        DataGenMRInstruction genInst = (DataGenMRInstruction) mrins;
        rlens[i] = genInst.getRows();
        clens[i] = genInst.getCols();
        brlens[i] = genInst.getRowsInBlock();
        bclens[i] = genInst.getColsInBlock();
        maxbrlen = Math.max(maxbrlen, brlens[i]);
        maxbclen = Math.max(maxbclen, bclens[i]);
        if (mrtype == MRType.Rand) {
            RandInstruction randInst = (RandInstruction) mrins;
            inputs[i] = LibMatrixDatagen.generateUniqueSeedPath(genInst.getBaseDir());
            maxsparsity = Math.max(maxsparsity, randInst.getSparsity());
            PrintWriter pw = null;
            try {
                pw = new PrintWriter(fs.create(new Path(inputs[i])));
                // for obj reuse and preventing repeated buffer re-allocations
                StringBuilder sb = new StringBuilder();
                // seed generation
                Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(randInst.getSeed());
                for (long r = 0; r < Math.max(rlens[i], 1); r += brlens[i]) {
                    long curBlockRowSize = Math.min(brlens[i], (rlens[i] - r));
                    for (long c = 0; c < Math.max(clens[i], 1); c += bclens[i]) {
                        long curBlockColSize = Math.min(bclens[i], (clens[i] - c));
                        sb.append((r / brlens[i]) + 1);
                        sb.append(',');
                        sb.append((c / bclens[i]) + 1);
                        sb.append(',');
                        sb.append(curBlockRowSize);
                        sb.append(',');
                        sb.append(curBlockColSize);
                        sb.append(',');
                        sb.append(bigrand.nextLong());
                        pw.println(sb.toString());
                        sb.setLength(0);
                        numblocks++;
                    }
                }
            } finally {
                IOUtilFunctions.closeSilently(pw);
            }
            inputInfos[i] = InputInfo.TextCellInputInfo;
        } else if (mrtype == MRType.Seq) {
            SeqInstruction seqInst = (SeqInstruction) mrins;
            inputs[i] = genInst.getBaseDir() + System.currentTimeMillis() + ".seqinput";
            // always dense
            maxsparsity = 1.0;
            double from = seqInst.fromValue;
            double to = seqInst.toValue;
            double incr = seqInst.incrValue;
            // handle default 1 to -1 for special case of from>to
            incr = LibMatrixDatagen.updateSeqIncr(from, to, incr);
            // Correctness checks on (from, to, incr)
            boolean neg = (from > to);
            if (incr == 0)
                throw new DMLRuntimeException("Invalid value for \"increment\" in seq().");
            if (neg != (incr < 0))
                throw new DMLRuntimeException("Wrong sign for the increment in a call to seq()");
            // Compute the number of rows in the sequence
            long numrows = UtilFunctions.getSeqLength(from, to, incr);
            if (rlens[i] > 0) {
                if (numrows != rlens[i])
                    throw new DMLRuntimeException("Unexpected error while processing sequence instruction. Expected number of rows does not match given number: " + rlens[i] + " != " + numrows);
            } else {
                rlens[i] = numrows;
            }
            if (clens[i] > 0 && clens[i] != 1)
                throw new DMLRuntimeException("Unexpected error while processing sequence instruction. Number of columns (" + clens[i] + ") must be equal to 1.");
            else
                clens[i] = 1;
            PrintWriter pw = null;
            try {
                pw = new PrintWriter(fs.create(new Path(inputs[i])));
                StringBuilder sb = new StringBuilder();
                double temp = from;
                double block_from, block_to;
                for (long r = 0; r < rlens[i]; r += brlens[i]) {
                    long curBlockRowSize = Math.min(brlens[i], (rlens[i] - r));
                    // block (bid_i,bid_j) generates a sequence from the interval [block_from, block_to] (inclusive of both end points of the interval)
                    long bid_i = ((r / brlens[i]) + 1);
                    long bid_j = 1;
                    block_from = temp;
                    block_to = temp + (curBlockRowSize - 1) * incr;
                    // next block starts from here
                    temp = block_to + incr;
                    sb.append(bid_i);
                    sb.append(',');
                    sb.append(bid_j);
                    sb.append(',');
                    sb.append(block_from);
                    sb.append(',');
                    sb.append(block_to);
                    sb.append(',');
                    sb.append(incr);
                    pw.println(sb.toString());
                    sb.setLength(0);
                    numblocks++;
                }
            } finally {
                IOUtilFunctions.closeSilently(pw);
            }
            inputInfos[i] = InputInfo.TextCellInputInfo;
        } else {
            throw new DMLRuntimeException("Unexpected Data Generation Instruction Type: " + mrtype);
        }
    }
    // remove the first ","
    dataGenInsStr = dataGenInsStr.substring(1);
    RunningJob runjob;
    MatrixCharacteristics[] stats;
    try {
        // set up the block size
        MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
        // set up the input files and their format information
        MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.BLOCK);
        // set up the dimensions of input matrices
        MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
        MRJobConfiguration.setDimsUnknownFilePrefix(job, dimsUnknownFilePrefix);
        // set up the block size
        MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
        // set up the rand Instructions
        MRJobConfiguration.setRandInstructions(job, dataGenInsStr);
        // set up unary instructions that will perform in the mapper
        MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
        // set up the aggregate instructions that will happen in the combiner and reducer
        MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer);
        // set up the instructions that will happen in the reducer, after the aggregation instrucions
        MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer);
        // set up the replication factor for the results
        job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
        // set up map/reduce memory configurations (if in AM context)
        DMLConfig config = ConfigurationManager.getDMLConfig();
        DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
        // set up custom map/reduce configurations
        MRJobConfiguration.setupCustomMRConfigurations(job, config);
        // determine degree of parallelism (nmappers: 1<=n<=capacity)
        // TODO use maxsparsity whenever we have a way of generating sparse rand data
        int capacity = InfrastructureAnalyzer.getRemoteParallelMapTasks();
        long dfsblocksize = InfrastructureAnalyzer.getHDFSBlockSize();
        // correction max number of mappers on yarn clusters
        if (InfrastructureAnalyzer.isYarnEnabled())
            capacity = (int) Math.max(capacity, YarnClusterAnalyzer.getNumCores());
        int nmapers = Math.max(Math.min((int) (8 * maxbrlen * maxbclen * (long) numblocks / dfsblocksize), capacity), 1);
        job.setNumMapTasks(nmapers);
        // set up what matrices are needed to pass from the mapper to reducer
        HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, dataGenInsStr, instructionsInMapper, null, aggInstructionsInReducer, otherInstructionsInReducer, resultIndexes);
        MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, dataGenInsStr, instructionsInMapper, null, aggInstructionsInReducer, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false);
        stats = ret.stats;
        // set up the number of reducers
        MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);
        // print the complete MRJob instruction
        if (LOG.isTraceEnabled())
            inst.printCompleteMRJobInstruction(stats);
        // Update resultDimsUnknown based on computed "stats"
        byte[] resultDimsUnknown = new byte[resultIndexes.length];
        for (int i = 0; i < resultIndexes.length; i++) {
            if (stats[i].getRows() == -1 || stats[i].getCols() == -1) {
                resultDimsUnknown[i] = (byte) 1;
            } else {
                resultDimsUnknown[i] = (byte) 0;
            }
        }
        boolean mayContainCtable = instructionsInMapper.contains("ctabletransform") || instructionsInMapper.contains("groupedagg");
        // set up the multiple output files, and their format information
        MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, mayContainCtable);
        // configure mapper and the mapper output key value pairs
        job.setMapperClass(DataGenMapper.class);
        if (numReducers == 0) {
            job.setMapOutputKeyClass(Writable.class);
            job.setMapOutputValueClass(Writable.class);
        } else {
            job.setMapOutputKeyClass(MatrixIndexes.class);
            job.setMapOutputValueClass(TaggedMatrixBlock.class);
        }
        // set up combiner
        if (numReducers != 0 && aggInstructionsInReducer != null && !aggInstructionsInReducer.isEmpty())
            job.setCombinerClass(GMRCombiner.class);
        // configure reducer
        job.setReducerClass(GMRReducer.class);
        // job.setReducerClass(PassThroughReducer.class);
        // By default, the job executes in "cluster" mode.
        // Determine if we can optimize and run it in "local" mode.
        MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
        for (int i = 0; i < inputs.length; i++) {
            inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
        }
        // set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);
        runjob = JobClient.runJob(job);
        /* Process different counters */
        Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
        for (int i = 0; i < resultIndexes.length; i++) {
            // number of non-zeros
            stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
        }
        String dir = dimsUnknownFilePrefix + "/" + runjob.getID().toString() + "_dimsFile";
        stats = MapReduceTool.processDimsFiles(dir, stats);
        MapReduceTool.deleteFileIfExistOnHDFS(dir);
    } finally {
        for (String input : inputs) MapReduceTool.deleteFileIfExistOnHDFS(new Path(input), job);
    }
    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
Also used : Group(org.apache.hadoop.mapred.Counters.Group) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) GMRCombiner(org.apache.sysml.runtime.matrix.mapred.GMRCombiner) FileSystem(org.apache.hadoop.fs.FileSystem) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) JobConf(org.apache.hadoop.mapred.JobConf) PrintWriter(java.io.PrintWriter) Path(org.apache.hadoop.fs.Path) DMLConfig(org.apache.sysml.conf.DMLConfig) SeqInstruction(org.apache.sysml.runtime.instructions.mr.SeqInstruction) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) MRType(org.apache.sysml.runtime.instructions.mr.MRInstruction.MRType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixChar_N_ReducerGroups(org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups) RunningJob(org.apache.hadoop.mapred.RunningJob) Well1024a(org.apache.commons.math3.random.Well1024a)

Example 8 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.

the class DataGenMapper method map.

@Override
public // valueString has to be Text type
void map(Writable key, Writable valueString, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException {
    cachedReporter = reporter;
    long start = System.currentTimeMillis();
    // for each representative matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        DataGenMRInstruction genInst = dataGen_instructions.get(i);
        if (genInst.getDataGenMethod() == DataGenMethod.RAND) {
            RandInstruction randInst = (RandInstruction) genInst;
            String[] params = valueString.toString().split(",");
            long blockRowNumber = Long.parseLong(params[0]);
            long blockColNumber = Long.parseLong(params[1]);
            int blockRowSize = Integer.parseInt(params[2]);
            int blockColSize = Integer.parseInt(params[3]);
            long seed = Long.parseLong(params[4]);
            double minValue = randInst.getMinValue();
            double maxValue = randInst.getMaxValue();
            double sparsity = randInst.getSparsity();
            String pdf = randInst.getProbabilityDensityFunction().toLowerCase();
            // rand data generation
            try {
                indexes[i].setIndexes(blockRowNumber, blockColNumber);
                RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, blockRowSize, blockColSize, blockRowSize, blockColSize, sparsity, minValue, maxValue, randInst.getPdfParams());
                block[i].randOperationsInPlace(rgen, null, seed);
            } catch (DMLRuntimeException e) {
                throw new IOException(e);
            }
        } else if (genInst.getDataGenMethod() == DataGenMethod.SEQ) {
            String[] params = valueString.toString().split(",");
            long blockRowNumber = Long.parseLong(params[0]);
            long blockColNumber = Long.parseLong(params[1]);
            double from = Double.parseDouble(params[2]);
            double to = Double.parseDouble(params[3]);
            double incr = Double.parseDouble(params[4]);
            // handle default 1 to -1 for special case of from>to
            incr = LibMatrixDatagen.updateSeqIncr(from, to, incr);
            // sequence data generation
            try {
                indexes[i].setIndexes(blockRowNumber, blockColNumber);
                block[i].seqOperationsInPlace(from, to, incr);
            } catch (DMLRuntimeException e) {
                throw new IOException(e);
            }
        } else {
            throw new IOException("Unknown data generation instruction: " + genInst.toString());
        }
        // put the input in the cache
        cachedValues.reset();
        cachedValues.set(genInst.output, indexes[i], block[i]);
        // special operations for individual mapp type
        specialOperationsForActualMap(i, out, reporter);
    }
    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) IOException(java.io.IOException) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 9 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.

the class MRJobConfiguration method computeMatrixCharacteristics.

/**
 * NOTE: this method needs to be in-sync with MRBaseForCommonInstructions.processOneInstruction,
 * otherwise, the latter will potentially fail with missing dimension information.
 *
 * @param job job configuration
 * @param inputIndexes array of byte indexes
 * @param dataGenInstructions data gen instructions as a string
 * @param instructionsInMapper instruction in mapper as a string
 * @param reblockInstructions reblock instructions as a string
 * @param aggInstructionsInReducer aggregate instructions in reducer as a string
 * @param aggBinInstructions binary aggregate instructions as a string
 * @param otherInstructionsInReducer other instructions in reducer as a string
 * @param resultIndexes array of byte result indexes
 * @param mapOutputIndexes set of map output indexes
 * @param forMMCJ ?
 * @return reducer groups
 */
public static MatrixChar_N_ReducerGroups computeMatrixCharacteristics(JobConf job, byte[] inputIndexes, String dataGenInstructions, String instructionsInMapper, String reblockInstructions, String aggInstructionsInReducer, String aggBinInstructions, String otherInstructionsInReducer, byte[] resultIndexes, HashSet<Byte> mapOutputIndexes, boolean forMMCJ) {
    HashSet<Byte> intermediateMatrixIndexes = new HashSet<>();
    HashMap<Byte, MatrixCharacteristics> dims = new HashMap<>();
    for (byte i : inputIndexes) {
        MatrixCharacteristics dim = new MatrixCharacteristics(getNumRows(job, i), getNumColumns(job, i), getNumRowsPerBlock(job, i), getNumColumnsPerBlock(job, i), getNumNonZero(job, i));
        dims.put(i, dim);
    }
    DataGenMRInstruction[] dataGenIns = null;
    dataGenIns = MRInstructionParser.parseDataGenInstructions(dataGenInstructions);
    if (dataGenIns != null) {
        for (DataGenMRInstruction ins : dataGenIns) {
            MatrixCharacteristics.computeDimension(dims, ins);
        }
    }
    MRInstruction[] insMapper = MRInstructionParser.parseMixedInstructions(instructionsInMapper);
    if (insMapper != null) {
        for (MRInstruction ins : insMapper) {
            MatrixCharacteristics.computeDimension(dims, ins);
            if (ins instanceof UnaryMRInstructionBase) {
                UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
                intermediateMatrixIndexes.add(tempIns.input);
            } else if (ins instanceof AppendMInstruction) {
                AppendMInstruction tempIns = (AppendMInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof AppendGInstruction) {
                AppendGInstruction tempIns = (AppendGInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof BinaryMInstruction) {
                BinaryMInstruction tempIns = (BinaryMInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof AggregateBinaryInstruction) {
                AggregateBinaryInstruction tempIns = (AggregateBinaryInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                // TODO
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof MapMultChainInstruction) {
                MapMultChainInstruction tempIns = (MapMultChainInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.getInput1(), dims.get(tempIns.getInput2()));
                intermediateMatrixIndexes.add(tempIns.getInput1());
            } else if (ins instanceof PMMJMRInstruction) {
                PMMJMRInstruction tempIns = (PMMJMRInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input2, dims.get(tempIns.input2));
                intermediateMatrixIndexes.add(tempIns.input2);
            }
        }
    }
    ReblockInstruction[] reblockIns = MRInstructionParser.parseReblockInstructions(reblockInstructions);
    if (reblockIns != null) {
        for (ReblockInstruction ins : reblockIns) {
            MatrixCharacteristics.computeDimension(dims, ins);
            setMatrixCharactristicsForReblock(job, ins.output, dims.get(ins.output));
        }
    }
    Instruction[] aggIns = MRInstructionParser.parseAggregateInstructions(aggInstructionsInReducer);
    if (aggIns != null) {
        for (Instruction ins : aggIns) {
            MatrixCharacteristics.computeDimension(dims, (MRInstruction) ins);
            // if instruction's output is not in resultIndexes, then add its dimensions to jobconf
            MRInstruction mrins = (MRInstruction) ins;
            boolean found = false;
            for (byte b : resultIndexes) {
                if (b == mrins.output) {
                    found = true;
                    break;
                }
            }
            if (!found) {
                setIntermediateMatrixCharactristics(job, mrins.output, dims.get(mrins.output));
                intermediateMatrixIndexes.add(mrins.output);
            }
        }
    }
    long numReduceGroups = 0;
    AggregateBinaryInstruction[] aggBinIns = getAggregateBinaryInstructions(job);
    if (aggBinIns != null) {
        for (AggregateBinaryInstruction ins : aggBinIns) {
            MatrixCharacteristics dim1 = dims.get(ins.input1);
            MatrixCharacteristics dim2 = dims.get(ins.input2);
            setMatrixCharactristicsForBinAgg(job, ins.input1, dim1);
            setMatrixCharactristicsForBinAgg(job, ins.input2, dim2);
            MatrixCharacteristics.computeDimension(dims, ins);
            if (// there will be only one aggbin operation for MMCJ
            forMMCJ)
                numReduceGroups = (long) Math.ceil((double) dim1.getCols() / (double) dim1.getColsPerBlock());
        }
    }
    if (!forMMCJ) {
        // store the skylines
        ArrayList<Long> xs = new ArrayList<>(mapOutputIndexes.size());
        ArrayList<Long> ys = new ArrayList<>(mapOutputIndexes.size());
        for (byte idx : mapOutputIndexes) {
            MatrixCharacteristics dim = dims.get(idx);
            long x = (long) Math.ceil((double) dim.getRows() / (double) dim.getRowsPerBlock());
            long y = (long) Math.ceil((double) dim.getCols() / (double) dim.getColsPerBlock());
            int i = 0;
            boolean toadd = true;
            while (i < xs.size()) {
                if ((x >= xs.get(i) && y > ys.get(i)) || (x > xs.get(i) && y >= ys.get(i))) {
                    // remove any included x's and y's
                    xs.remove(i);
                    ys.remove(i);
                } else if (// if included in others, stop
                x <= xs.get(i) && y <= ys.get(i)) {
                    toadd = false;
                    break;
                } else
                    i++;
            }
            if (toadd) {
                xs.add(x);
                ys.add(y);
            }
        }
        // sort by x
        TreeMap<Long, Long> map = new TreeMap<>();
        for (int i = 0; i < xs.size(); i++) map.put(xs.get(i), ys.get(i));
        numReduceGroups = 0;
        // compute area
        long prev = 0;
        for (Entry<Long, Long> e : map.entrySet()) {
            numReduceGroups += (e.getKey() - prev) * e.getValue();
            prev = e.getKey();
        }
    }
    MRInstruction[] insReducer = MRInstructionParser.parseMixedInstructions(otherInstructionsInReducer);
    if (insReducer != null) {
        for (MRInstruction ins : insReducer) {
            MatrixCharacteristics.computeDimension(dims, ins);
            if (ins instanceof UnaryMRInstructionBase) {
                UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
                intermediateMatrixIndexes.add(tempIns.input);
            } else if (ins instanceof RemoveEmptyMRInstruction) {
                RemoveEmptyMRInstruction tempIns = (RemoveEmptyMRInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            }
            // if instruction's output is not in resultIndexes, then add its dimensions to jobconf
            boolean found = false;
            for (byte b : resultIndexes) {
                if (b == ins.output) {
                    found = true;
                    break;
                }
            }
            if (!found) {
                setIntermediateMatrixCharactristics(job, ins.output, dims.get(ins.output));
                intermediateMatrixIndexes.add(ins.output);
            }
        }
    }
    setIntermediateMatrixIndexes(job, intermediateMatrixIndexes);
    for (byte tag : mapOutputIndexes) setMatrixCharactristicsForMapperOutput(job, tag, dims.get(tag));
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    MatrixCharacteristics resultDims;
    for (int i = 0; i < resultIndexes.length; i++) {
        resultDims = dims.get(resultIndexes[i]);
        stats[i] = resultDims;
        setMatrixCharactristicsForOutput(job, resultIndexes[i], stats[i]);
    }
    return new MatrixChar_N_ReducerGroups(stats, numReduceGroups);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) CSVWriteInstruction(org.apache.sysml.runtime.instructions.mr.CSVWriteInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction) GroupedAggregateInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction) AggregateInstruction(org.apache.sysml.runtime.instructions.mr.AggregateInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) AppendMInstruction(org.apache.sysml.runtime.instructions.mr.AppendMInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) CM_N_COVInstruction(org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction) MapMultChainInstruction(org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) AppendGInstruction(org.apache.sysml.runtime.instructions.mr.AppendGInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) UnaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase) MapMultChainInstruction(org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) HashSet(java.util.HashSet) AppendGInstruction(org.apache.sysml.runtime.instructions.mr.AppendGInstruction) TreeMap(java.util.TreeMap) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) AppendMInstruction(org.apache.sysml.runtime.instructions.mr.AppendMInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)

Example 10 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project systemml by apache.

the class RunMRJobs method executeInMemoryDataGenOperations.

private static JobReturn executeInMemoryDataGenOperations(MRJobInstruction inst, String randInst, MatrixObject[] outputMatrices) {
    MatrixCharacteristics[] mc = new MatrixCharacteristics[outputMatrices.length];
    DataGenMRInstruction[] dgSet = MRInstructionParser.parseDataGenInstructions(randInst);
    byte[] results = inst.getIv_resultIndices();
    for (DataGenMRInstruction ldgInst : dgSet) {
        if (ldgInst instanceof RandInstruction) {
            // CP Rand block operation
            RandInstruction lrand = (RandInstruction) ldgInst;
            RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(lrand.getProbabilityDensityFunction(), (int) lrand.getRows(), (int) lrand.getCols(), lrand.getRowsInBlock(), lrand.getColsInBlock(), lrand.getSparsity(), lrand.getMinValue(), lrand.getMaxValue(), lrand.getPdfParams());
            MatrixBlock mb = MatrixBlock.randOperations(rgen, lrand.getSeed());
            for (int i = 0; i < results.length; i++) if (lrand.output == results[i]) {
                outputMatrices[i].acquireModify(mb);
                outputMatrices[i].release();
                mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lrand.getRowsInBlock(), lrand.getColsInBlock(), mb.getNonZeros());
            }
        } else if (ldgInst instanceof SeqInstruction) {
            SeqInstruction lseq = (SeqInstruction) ldgInst;
            MatrixBlock mb = MatrixBlock.seqOperations(lseq.fromValue, lseq.toValue, lseq.incrValue);
            for (int i = 0; i < results.length; i++) if (lseq.output == results[i]) {
                outputMatrices[i].acquireModify(mb);
                outputMatrices[i].release();
                mc[i] = new MatrixCharacteristics(mb.getNumRows(), mb.getNumColumns(), lseq.getRowsInBlock(), lseq.getColsInBlock(), mb.getNonZeros());
            }
        }
    }
    return new JobReturn(mc, inst.getOutputInfos(), true);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SeqInstruction(org.apache.sysml.runtime.instructions.mr.SeqInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

DataGenMRInstruction (org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction)14 MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)8 RandInstruction (org.apache.sysml.runtime.instructions.mr.RandInstruction)8 HashSet (java.util.HashSet)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 PMMJMRInstruction (org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction)6 ReblockInstruction (org.apache.sysml.runtime.instructions.mr.ReblockInstruction)6 RemoveEmptyMRInstruction (org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction)6 SeqInstruction (org.apache.sysml.runtime.instructions.mr.SeqInstruction)5 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 AggregateBinaryInstruction (org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction)4 AggregateInstruction (org.apache.sysml.runtime.instructions.mr.AggregateInstruction)4 BinaryMInstruction (org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)4 CM_N_COVInstruction (org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction)4 CSVReblockInstruction (org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction)4 GroupedAggregateInstruction (org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction)4 MMTSJMRInstruction (org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction)4 MapMultChainInstruction (org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction)4 UnaryMRInstructionBase (org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase)4