Search in sources :

Example 96 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class MRBaseForCommonInstructions method processOneInstruction.

protected void processOneInstruction(MRInstruction ins, Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput) throws DMLRuntimeException {
    if (ins instanceof AggregateBinaryInstruction) {
        byte input = ((AggregateBinaryInstruction) ins).input1;
        MatrixCharacteristics dim = dimensions.get(input);
        if (dim == null)
            throw new DMLRuntimeException("dimension for instruction " + ins + "  is unset!!!");
        ins.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dim.getRowsPerBlock(), dim.getColsPerBlock());
    } else if (ins instanceof ZeroOutInstruction || ins instanceof AggregateUnaryInstruction || ins instanceof RangeBasedReIndexInstruction || ins instanceof CumulativeSplitInstruction) {
        byte input = ((UnaryMRInstructionBase) ins).input;
        MatrixCharacteristics dim = dimensions.get(input);
        if (dim == null)
            throw new DMLRuntimeException("dimension for instruction " + ins + "  is unset!!!");
        if (ins instanceof CumulativeAggregateInstruction)
            ((CumulativeAggregateInstruction) ins).setMatrixCharacteristics(dim);
        if (ins instanceof CumulativeSplitInstruction)
            ((CumulativeSplitInstruction) ins).setMatrixCharacteristics(dim);
        ins.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dim.getRowsPerBlock(), dim.getColsPerBlock());
    } else if (ins instanceof ReorgInstruction) {
        ReorgInstruction rinst = (ReorgInstruction) ins;
        byte input = rinst.input;
        MatrixCharacteristics dim = dimensions.get(input);
        if (dim == null)
            throw new DMLRuntimeException("dimension for instruction " + ins + "  is unset!!!");
        rinst.setInputMatrixCharacteristics(dim);
        //MMCJMRMapper does not output empty blocks, no need to generate
        rinst.setOutputEmptyBlocks(!(this instanceof MMCJMRMapper));
        ins.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dim.getRowsPerBlock(), dim.getColsPerBlock());
    } else if (ins instanceof MatrixReshapeMRInstruction) {
        MatrixReshapeMRInstruction mrins = (MatrixReshapeMRInstruction) ins;
        byte input = mrins.input;
        byte output = mrins.output;
        MatrixCharacteristics dimIn = dimensions.get(input);
        MatrixCharacteristics dimOut = dimensions.get(output);
        if (dimIn == null || dimOut == null)
            throw new DMLRuntimeException("dimension for instruction " + ins + "  is unset!!!");
        mrins.setMatrixCharacteristics(dimIn, dimOut);
        mrins.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dimIn.getRowsPerBlock(), dimIn.getColsPerBlock());
    } else if (ins instanceof AppendMInstruction) {
        byte input = ((AppendMInstruction) ins).input1;
        MatrixCharacteristics dim = dimensions.get(input);
        if (dim == null)
            throw new DMLRuntimeException("dimension for instruction " + ins + "  is unset!!!");
        ins.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dim.getRowsPerBlock(), dim.getColsPerBlock());
    } else if (ins instanceof BinaryMInstruction || ins instanceof RemoveEmptyMRInstruction) {
        byte input = ((BinaryMRInstructionBase) ins).input1;
        MatrixCharacteristics dim = dimensions.get(input);
        if (dim == null)
            throw new DMLRuntimeException("dimension for instruction " + ins + "  is unset!!!");
        ins.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dim.getRowsPerBlock(), dim.getColsPerBlock());
    } else if (ins instanceof AppendGInstruction) {
        AppendGInstruction arinst = ((AppendGInstruction) ins);
        byte input = arinst.input1;
        MatrixCharacteristics dimIn = dimensions.get(input);
        if (dimIn == null)
            throw new DMLRuntimeException("Dimensions for instruction " + arinst + "  is unset!!!");
        arinst.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dimIn.getRowsPerBlock(), dimIn.getColsPerBlock());
    } else if (ins instanceof UnaryMRInstructionBase) {
        UnaryMRInstructionBase rinst = (UnaryMRInstructionBase) ins;
        MatrixCharacteristics dimIn = dimensions.get(rinst.input);
        if (dimIn == null)
            throw new DMLRuntimeException("Dimensions for instruction " + rinst + "  is unset!!!");
        rinst.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dimIn.getRowsPerBlock(), dimIn.getColsPerBlock());
    } else if (ins instanceof BinaryMRInstructionBase) {
        BinaryMRInstructionBase rinst = (BinaryMRInstructionBase) ins;
        MatrixCharacteristics dimIn = dimensions.get(rinst.input1);
        if (//not set for all
        dimIn != null)
            rinst.processInstruction(valueClass, cachedValues, tempValue, zeroInput, dimIn.getRowsPerBlock(), dimIn.getColsPerBlock());
        else
            ins.processInstruction(valueClass, cachedValues, tempValue, zeroInput, -1, -1);
    } else
        ins.processInstruction(valueClass, cachedValues, tempValue, zeroInput, -1, -1);
//System.out.println(ins.getMRInstructionType()+" in "+time.stop());
}
Also used : BinaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.BinaryMRInstructionBase) AppendGInstruction(org.apache.sysml.runtime.instructions.mr.AppendGInstruction) AggregateUnaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateUnaryInstruction) RangeBasedReIndexInstruction(org.apache.sysml.runtime.instructions.mr.RangeBasedReIndexInstruction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixReshapeMRInstruction(org.apache.sysml.runtime.instructions.mr.MatrixReshapeMRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) AppendMInstruction(org.apache.sysml.runtime.instructions.mr.AppendMInstruction) ZeroOutInstruction(org.apache.sysml.runtime.instructions.mr.ZeroOutInstruction) ReorgInstruction(org.apache.sysml.runtime.instructions.mr.ReorgInstruction) CumulativeAggregateInstruction(org.apache.sysml.runtime.instructions.mr.CumulativeAggregateInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) UnaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase) CumulativeSplitInstruction(org.apache.sysml.runtime.instructions.mr.CumulativeSplitInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)

Example 97 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class MMRJMRMapper method configure.

public void configure(JobConf job) {
    super.configure(job);
    taggedValue = TaggedMatrixValue.createObject(valueClass);
    AggregateBinaryInstruction[] aggBinInstructions;
    try {
        aggBinInstructions = MRJobConfiguration.getAggregateBinaryInstructions(job);
    } catch (DMLRuntimeException e) {
        throw new RuntimeException(e);
    }
    for (AggregateBinaryInstruction aggBinInstruction : aggBinInstructions) {
        MatrixCharacteristics mc = MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, aggBinInstruction.input2);
        long matrixNumColumn = mc.getCols();
        int blockNumColumn = mc.getColsPerBlock();
        numRepeats.put(aggBinInstruction.input1, (long) Math.ceil((double) matrixNumColumn / (double) blockNumColumn));
        mc = MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, aggBinInstruction.input1);
        long matrixNumRow = mc.getRows();
        int blockNumRow = mc.getRowsPerBlock();
        numRepeats.put(aggBinInstruction.input2, (long) Math.ceil((double) matrixNumRow / (double) blockNumRow));
        aggBinInput1s.add(aggBinInstruction.input1);
        aggBinInput2s.add(aggBinInstruction.input2);
    }
}
Also used : DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 98 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class CSVWriteReducer method configure.

@Override
public void configure(JobConf job) {
    super.configure(job);
    byte maxIndex = 0;
    HashMap<Byte, CSVWriteInstruction> out2Ins = new HashMap<Byte, CSVWriteInstruction>();
    try {
        CSVWriteInstruction[] ins = MRJobConfiguration.getCSVWriteInstructions(job);
        for (CSVWriteInstruction in : ins) {
            out2Ins.put(in.output, in);
            if (in.output > maxIndex)
                maxIndex = in.output;
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    int numParitions = job.getNumReduceTasks();
    int taskID = MapReduceTool.getUniqueTaskId(job);
    //LOG.info("## taks id: "+taskID);
    //for efficiency only, the arrays may have missing values
    rowIndexes = new long[maxIndex + 1];
    colIndexes = new long[maxIndex + 1];
    maxRowIndexes = new long[maxIndex + 1];
    minRowIndexes = new long[maxIndex + 1];
    numColBlocks = new long[maxIndex + 1];
    lastBlockNCols = new int[maxIndex + 1];
    colsPerBlock = new int[maxIndex + 1];
    delims = new String[maxIndex + 1];
    sparses = new boolean[maxIndex + 1];
    tagToResultIndex = new int[maxIndex + 1];
    for (int i = 0; i < resultIndexes.length; i++) {
        byte ri = resultIndexes[i];
        tagToResultIndex[ri] = i;
        CSVWriteInstruction in = out2Ins.get(ri);
        MatrixCharacteristics dim = MRJobConfiguration.getMatrixCharacteristicsForInput(job, in.input);
        delims[ri] = in.delim;
        sparses[ri] = in.sparse;
        numColBlocks[ri] = (long) Math.ceil((double) dim.getCols() / (double) dim.getColsPerBlock());
        lastBlockNCols[ri] = (int) (dim.getCols() % dim.getColsPerBlock());
        colsPerBlock[ri] = dim.getColsPerBlock();
        long rstep = (long) Math.ceil((double) dim.getRows() / (double) numParitions);
        minRowIndexes[ri] = rowIndexes[ri] = rstep * taskID;
        maxRowIndexes[ri] = Math.min(rstep * (taskID + 1), dim.getRows());
        colIndexes[ri] = 0;
    }
    zeroBlock.setData(new MatrixBlock());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) HashMap(java.util.HashMap) CSVWriteInstruction(org.apache.sysml.runtime.instructions.mr.CSVWriteInstruction) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 99 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class GMRReducer method processReducerInstructionsInGMR.

protected void processReducerInstructionsInGMR(MatrixIndexes indexes) throws IOException {
    if (mixed_instructions == null)
        return;
    try {
        for (MRInstruction ins : mixed_instructions) {
            if (ins instanceof TernaryInstruction) {
                MatrixCharacteristics dim = dimensions.get(((TernaryInstruction) ins).input1);
                ((TernaryInstruction) ins).processInstruction(valueClass, cachedValues, zeroInput, _buff.getMapBuffer(), _buff.getBlockBuffer(), dim.getRowsPerBlock(), dim.getColsPerBlock());
                if (_buff.getBufferSize() > GMRCtableBuffer.MAX_BUFFER_SIZE)
                    //prevent oom for large/many ctables
                    _buff.flushBuffer(cachedReporter);
            } else if (ins instanceof AppendRInstruction) {
                MatrixCharacteristics dims1 = dimensions.get(((AppendRInstruction) ins).input1);
                MatrixCharacteristics dims2 = dimensions.get(((AppendRInstruction) ins).input2);
                long nbi1 = (long) Math.ceil((double) dims1.getRows() / dims1.getRowsPerBlock());
                long nbi2 = (long) Math.ceil((double) dims2.getRows() / dims2.getRowsPerBlock());
                long nbj1 = (long) Math.ceil((double) dims1.getCols() / dims1.getColsPerBlock());
                long nbj2 = (long) Math.ceil((double) dims2.getCols() / dims2.getColsPerBlock());
                // Execute the instruction only if current indexes fall within the range of input dimensions
                if ((nbi1 < indexes.getRowIndex() && nbi2 < indexes.getRowIndex()) || (nbj1 < indexes.getColumnIndex() && nbj2 < indexes.getColumnIndex()))
                    continue;
                else
                    processOneInstruction(ins, valueClass, cachedValues, tempValue, zeroInput);
            } else
                processOneInstruction(ins, valueClass, cachedValues, tempValue, zeroInput);
        }
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : AppendRInstruction(org.apache.sysml.runtime.instructions.mr.AppendRInstruction) TernaryInstruction(org.apache.sysml.runtime.instructions.mr.TernaryInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) IOException(java.io.IOException) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 100 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class FrameIndexingDistTest method runTestLeftIndexing.

private void runTestLeftIndexing(ExecType et, LeftIndexingOp.LeftIndexingMethod indexingMethod, ValueType[] schema, IXType itype, boolean bSparse) throws DMLRuntimeException, IOException {
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    RUNTIME_PLATFORM oldRTP = rtplatform;
    TestConfiguration config = null;
    HashMap<String, ValueType[]> outputSchema = new HashMap<String, ValueType[]>();
    if (itype == IXType.LIX)
        config = getTestConfiguration("FrameLeftIndexing");
    else
        config = getTestConfiguration("FrameRightIndexing");
    try {
        if (indexingMethod != null) {
            LeftIndexingOp.FORCED_LEFT_INDEXING = indexingMethod;
        }
        if (et == ExecType.SPARK) {
            rtplatform = RUNTIME_PLATFORM.SPARK;
        } else {
            // rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE;
            rtplatform = RUNTIME_PLATFORM.HYBRID;
        }
        if (rtplatform == RUNTIME_PLATFORM.SPARK)
            DMLScript.USE_LOCAL_SPARK_CONFIG = true;
        config.addVariable("rows", rows);
        config.addVariable("cols", cols);
        long rowstart = 816, rowend = 1229, colstart = 109, /*967*/
        colend = 1009;
        config.addVariable("rowstart", rowstart);
        config.addVariable("rowend", rowend);
        config.addVariable("colstart", colstart);
        config.addVariable("colend", colend);
        loadTestConfiguration(config);
        if (itype == IXType.LIX) {
            /* This is for running the junit test the new way, i.e., construct the arguments directly */
            String LI_HOME = SCRIPT_DIR + TEST_DIR;
            fullDMLScriptName = LI_HOME + TEST_NAME + ".dml";
            programArgs = new String[] { "-args", input("A"), Long.toString(rows), Long.toString(cols), Long.toString(rowstart), Long.toString(rowend), Long.toString(colstart), Long.toString(colend), output("AB"), output("AC"), output("AD"), input("B"), input("C"), input("D"), Long.toString(rowend - rowstart + 1), Long.toString(colend - colstart + 1), Long.toString(cols - colstart + 1) };
            fullRScriptName = LI_HOME + TEST_NAME + ".R";
            rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir();
            //initialize the frame data.
            //rand.nextDouble(); 
            double sparsity = sparsity1;
            double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
            writeInputFrameWithMTD("A", A, true, schema, OutputInfo.BinaryBlockOutputInfo);
            //rand.nextDouble();
            sparsity = sparsity3;
            double[][] B = getRandomMatrix((int) (rowend - rowstart + 1), (int) (colend - colstart + 1), min, max, sparsity, 2345);
            ValueType[] lschemaB = Arrays.copyOfRange(schema, (int) colstart - 1, (int) colend);
            writeInputFrameWithMTD("B", B, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
            //rand.nextDouble();
            sparsity = sparsity2;
            double[][] C = getRandomMatrix((int) (rowend), (int) (cols - colstart + 1), min, max, sparsity, 3267);
            ValueType[] lschemaC = Arrays.copyOfRange(schema, (int) colstart - 1, (int) cols);
            writeInputFrameWithMTD("C", C, true, lschemaC, OutputInfo.BinaryBlockOutputInfo);
            //rand.nextDoublBe();
            sparsity = sparsity4;
            double[][] D = getRandomMatrix(rows, (int) (colend - colstart + 1), min, max, sparsity, 4856);
            writeInputFrameWithMTD("D", D, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
            boolean exceptionExpected = false;
            int expectedNumberOfJobs = -1;
            runTest(true, exceptionExpected, null, expectedNumberOfJobs);
            for (String file : config.getOutputFiles()) outputSchema.put(file, schema);
        } else {
            /* This is for running the junit test the new way, i.e., construct the arguments directly */
            String RI_HOME = SCRIPT_DIR + TEST_DIR;
            fullDMLScriptName = RI_HOME + RTEST_NAME + ".dml";
            programArgs = new String[] { "-stats", "-explain", "-args", input("A"), Long.toString(rows), Long.toString(cols), Long.toString(rowstart), Long.toString(rowend), Long.toString(colstart), Long.toString(colend), output("B"), output("C"), output("D") };
            fullRScriptName = RI_HOME + RTEST_NAME + ".R";
            rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir();
            //initialize the frame data.
            double sparsity = bSparse ? sparsity4 : sparsity2;
            double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
            writeInputFrameWithMTD("A", A, true, schema, OutputInfo.BinaryBlockOutputInfo);
            ValueType[] schemaB = new ValueType[(int) (colend - colstart + 1)];
            System.arraycopy(schema, (int) (colstart - 1), schemaB, 0, (int) (colend - colstart + 1));
            outputSchema.put(config.getOutputFiles()[0], schemaB);
            ValueType[] schemaC = new ValueType[(int) (cols - colstart + 1)];
            System.arraycopy(schema, (int) (colstart - 1), schemaC, 0, (int) (cols - colstart + 1));
            outputSchema.put(config.getOutputFiles()[1], schemaC);
            outputSchema.put(config.getOutputFiles()[2], schemaB);
            boolean exceptionExpected = false;
            int expectedNumberOfJobs = -1;
            runTest(true, exceptionExpected, null, expectedNumberOfJobs);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        rtplatform = oldRTP;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
        LeftIndexingOp.FORCED_LEFT_INDEXING = null;
    }
    runRScript(true);
    for (String file : config.getOutputFiles()) {
        FrameBlock frameBlock = readDMLFrameFromHDFS(file, InputInfo.BinaryBlockInputInfo);
        MatrixCharacteristics md = new MatrixCharacteristics(frameBlock.getNumRows(), frameBlock.getNumColumns(), -1, -1);
        FrameBlock frameRBlock = readRFrameFromHDFS(file + ".csv", InputInfo.CSVInputInfo, md);
        ValueType[] schemaOut = outputSchema.get(file);
        verifyFrameData(frameBlock, frameRBlock, schemaOut);
        System.out.println("File processed is " + file);
    }
}
Also used : HashMap(java.util.HashMap) ValueType(org.apache.sysml.parser.Expression.ValueType) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock)

Aggregations

MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)258 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)87 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)85 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)57 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)52 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)48 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)41 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)36 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)35 CellIndex (org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)34 IOException (java.io.IOException)27 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)24 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)21 ArrayList (java.util.ArrayList)19 ValueType (org.apache.sysml.parser.Expression.ValueType)19 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)16 Path (org.apache.hadoop.fs.Path)13 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)13 Test (org.junit.Test)13 LongWritable (org.apache.hadoop.io.LongWritable)12