Search in sources :

Example 16 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class SparkExecutionContext method toFrameBlock.

public static FrameBlock toFrameBlock(JavaPairRDD<Long, FrameBlock> rdd, ValueType[] schema, int rlen, int clen) throws DMLRuntimeException {
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    if (schema == null)
        schema = UtilFunctions.nCopies(clen, ValueType.STRING);
    //create output frame block (w/ lazy allocation)
    FrameBlock out = new FrameBlock(schema);
    out.ensureAllocatedColumns(rlen);
    List<Tuple2<Long, FrameBlock>> list = rdd.collect();
    //copy blocks one-at-a-time into output matrix block
    for (Tuple2<Long, FrameBlock> keyval : list) {
        //unpack index-block pair
        int ix = (int) (keyval._1() - 1);
        FrameBlock block = keyval._2();
        //copy into output frame
        out.copy(ix, ix + block.getNumRows() - 1, 0, block.getNumColumns() - 1, block);
        if (ix == 0) {
            out.setColumnNames(block.getColumnNames());
            out.setColumnMetadata(block.getColumnMetadata());
        }
    }
    if (DMLScript.STATISTICS) {
        Statistics.accSparkCollectTime(System.nanoTime() - t0);
        Statistics.incSparkCollectCount(1);
    }
    return out;
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) Tuple2(scala.Tuple2) Checkpoint(org.apache.sysml.lops.Checkpoint)

Example 17 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class MultiReturnParameterizedBuiltinCPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    //obtain and pin input frame
    FrameBlock fin = ec.getFrameInput(input1.getName());
    String spec = ec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getStringValue();
    String[] colnames = fin.getColumnNames();
    //execute block transform encode
    Encoder encoder = EncoderFactory.createEncoder(spec, colnames, fin.getNumColumns(), null);
    //build and apply
    MatrixBlock data = encoder.encode(fin, new MatrixBlock(fin.getNumRows(), fin.getNumColumns(), false));
    FrameBlock meta = encoder.getMetaData(new FrameBlock(fin.getNumColumns(), ValueType.STRING));
    meta.setColumnNames(colnames);
    //release input and outputs
    ec.releaseFrameInput(input1.getName());
    ec.setMatrixOutput(getOutput(0).getName(), data);
    ec.setFrameOutput(getOutput(1).getName(), meta);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) Encoder(org.apache.sysml.runtime.transform.encode.Encoder)

Example 18 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameIndexingDistTest method runTestLeftIndexing.

private void runTestLeftIndexing(ExecType et, LeftIndexingOp.LeftIndexingMethod indexingMethod, ValueType[] schema, IXType itype, boolean bSparse) throws DMLRuntimeException, IOException {
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    RUNTIME_PLATFORM oldRTP = rtplatform;
    TestConfiguration config = null;
    HashMap<String, ValueType[]> outputSchema = new HashMap<String, ValueType[]>();
    if (itype == IXType.LIX)
        config = getTestConfiguration("FrameLeftIndexing");
    else
        config = getTestConfiguration("FrameRightIndexing");
    try {
        if (indexingMethod != null) {
            LeftIndexingOp.FORCED_LEFT_INDEXING = indexingMethod;
        }
        if (et == ExecType.SPARK) {
            rtplatform = RUNTIME_PLATFORM.SPARK;
        } else {
            // rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE;
            rtplatform = RUNTIME_PLATFORM.HYBRID;
        }
        if (rtplatform == RUNTIME_PLATFORM.SPARK)
            DMLScript.USE_LOCAL_SPARK_CONFIG = true;
        config.addVariable("rows", rows);
        config.addVariable("cols", cols);
        long rowstart = 816, rowend = 1229, colstart = 109, /*967*/
        colend = 1009;
        config.addVariable("rowstart", rowstart);
        config.addVariable("rowend", rowend);
        config.addVariable("colstart", colstart);
        config.addVariable("colend", colend);
        loadTestConfiguration(config);
        if (itype == IXType.LIX) {
            /* This is for running the junit test the new way, i.e., construct the arguments directly */
            String LI_HOME = SCRIPT_DIR + TEST_DIR;
            fullDMLScriptName = LI_HOME + TEST_NAME + ".dml";
            programArgs = new String[] { "-args", input("A"), Long.toString(rows), Long.toString(cols), Long.toString(rowstart), Long.toString(rowend), Long.toString(colstart), Long.toString(colend), output("AB"), output("AC"), output("AD"), input("B"), input("C"), input("D"), Long.toString(rowend - rowstart + 1), Long.toString(colend - colstart + 1), Long.toString(cols - colstart + 1) };
            fullRScriptName = LI_HOME + TEST_NAME + ".R";
            rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir();
            //initialize the frame data.
            //rand.nextDouble(); 
            double sparsity = sparsity1;
            double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
            writeInputFrameWithMTD("A", A, true, schema, OutputInfo.BinaryBlockOutputInfo);
            //rand.nextDouble();
            sparsity = sparsity3;
            double[][] B = getRandomMatrix((int) (rowend - rowstart + 1), (int) (colend - colstart + 1), min, max, sparsity, 2345);
            ValueType[] lschemaB = Arrays.copyOfRange(schema, (int) colstart - 1, (int) colend);
            writeInputFrameWithMTD("B", B, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
            //rand.nextDouble();
            sparsity = sparsity2;
            double[][] C = getRandomMatrix((int) (rowend), (int) (cols - colstart + 1), min, max, sparsity, 3267);
            ValueType[] lschemaC = Arrays.copyOfRange(schema, (int) colstart - 1, (int) cols);
            writeInputFrameWithMTD("C", C, true, lschemaC, OutputInfo.BinaryBlockOutputInfo);
            //rand.nextDoublBe();
            sparsity = sparsity4;
            double[][] D = getRandomMatrix(rows, (int) (colend - colstart + 1), min, max, sparsity, 4856);
            writeInputFrameWithMTD("D", D, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
            boolean exceptionExpected = false;
            int expectedNumberOfJobs = -1;
            runTest(true, exceptionExpected, null, expectedNumberOfJobs);
            for (String file : config.getOutputFiles()) outputSchema.put(file, schema);
        } else {
            /* This is for running the junit test the new way, i.e., construct the arguments directly */
            String RI_HOME = SCRIPT_DIR + TEST_DIR;
            fullDMLScriptName = RI_HOME + RTEST_NAME + ".dml";
            programArgs = new String[] { "-stats", "-explain", "-args", input("A"), Long.toString(rows), Long.toString(cols), Long.toString(rowstart), Long.toString(rowend), Long.toString(colstart), Long.toString(colend), output("B"), output("C"), output("D") };
            fullRScriptName = RI_HOME + RTEST_NAME + ".R";
            rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir();
            //initialize the frame data.
            double sparsity = bSparse ? sparsity4 : sparsity2;
            double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
            writeInputFrameWithMTD("A", A, true, schema, OutputInfo.BinaryBlockOutputInfo);
            ValueType[] schemaB = new ValueType[(int) (colend - colstart + 1)];
            System.arraycopy(schema, (int) (colstart - 1), schemaB, 0, (int) (colend - colstart + 1));
            outputSchema.put(config.getOutputFiles()[0], schemaB);
            ValueType[] schemaC = new ValueType[(int) (cols - colstart + 1)];
            System.arraycopy(schema, (int) (colstart - 1), schemaC, 0, (int) (cols - colstart + 1));
            outputSchema.put(config.getOutputFiles()[1], schemaC);
            outputSchema.put(config.getOutputFiles()[2], schemaB);
            boolean exceptionExpected = false;
            int expectedNumberOfJobs = -1;
            runTest(true, exceptionExpected, null, expectedNumberOfJobs);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        rtplatform = oldRTP;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
        LeftIndexingOp.FORCED_LEFT_INDEXING = null;
    }
    runRScript(true);
    for (String file : config.getOutputFiles()) {
        FrameBlock frameBlock = readDMLFrameFromHDFS(file, InputInfo.BinaryBlockInputInfo);
        MatrixCharacteristics md = new MatrixCharacteristics(frameBlock.getNumRows(), frameBlock.getNumColumns(), -1, -1);
        FrameBlock frameRBlock = readRFrameFromHDFS(file + ".csv", InputInfo.CSVInputInfo, md);
        ValueType[] schemaOut = outputSchema.get(file);
        verifyFrameData(frameBlock, frameRBlock, schemaOut);
        System.out.println("File processed is " + file);
    }
}
Also used : HashMap(java.util.HashMap) ValueType(org.apache.sysml.parser.Expression.ValueType) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock)

Example 19 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameIndexingTest method runFrameIndexingTest.

/**
	 * 
	 * @param sparseM1
	 * @param sparseM2
	 * @param instType
	 */
private void runFrameIndexingTest(ValueType[] schema, IXType itype) {
    try {
        //data generation
        double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 2412);
        //init data frame 1
        FrameBlock frame1 = new FrameBlock(schema);
        Object[] row1 = new Object[schema.length];
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < schema.length; j++) A[i][j] = UtilFunctions.objectToDouble(schema[j], row1[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]));
            frame1.appendRow(row1);
        }
        //core indexing operation
        MatrixBlock mbC = null;
        FrameBlock frame3 = null;
        if (itype == IXType.RIX) {
            //matrix indexing
            MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
            mbC = mbA.sliceOperations(rl, ru, cl, cu, new MatrixBlock());
            //frame indexing
            frame3 = frame1.sliceOperations(rl, ru, cl, cu, new FrameBlock());
        } else if (itype == IXType.LIX) {
            //data generation
            double[][] B = getRandomMatrix(ru - rl + 1, cu - cl + 1, -10, 10, 0.9, 7);
            //init data frame 2
            ValueType[] lschema2 = new ValueType[cu - cl + 1];
            for (int j = cl; j <= cu; j++) lschema2[j - cl] = schema[j];
            FrameBlock frame2 = new FrameBlock(lschema2);
            Object[] row2 = new Object[lschema2.length];
            for (int i = 0; i < ru - rl + 1; i++) {
                for (int j = 0; j < lschema2.length; j++) B[i][j] = UtilFunctions.objectToDouble(lschema2[j], row2[j] = UtilFunctions.doubleToObject(lschema2[j], B[i][j]));
                frame2.appendRow(row2);
            }
            //matrix indexing
            MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
            MatrixBlock mbB = DataConverter.convertToMatrixBlock(B);
            mbC = mbA.leftIndexingOperations(mbB, rl, ru, cl, cu, new MatrixBlock(), UpdateType.COPY);
            //frame indexing
            frame3 = frame1.leftIndexingOperations(frame2, rl, ru, cl, cu, new FrameBlock());
        }
        //check basic meta data
        if (frame3.getNumRows() != mbC.getNumRows())
            Assert.fail("Wrong number of rows: " + frame3.getNumRows() + ", expected: " + mbC.getNumRows());
        //check correct values
        ValueType[] lschema = frame3.getSchema();
        for (int i = 0; i < ru - rl + 1; i++) for (int j = 0; j < lschema.length; j++) {
            double tmp = UtilFunctions.objectToDouble(lschema[j], frame3.get(i, j));
            if (tmp != mbC.quickGetValue(i, j))
                Assert.fail("Wrong get value for cell (" + i + "," + j + "): " + tmp + ", expected: " + mbC.quickGetValue(i, j));
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) ValueType(org.apache.sysml.parser.Expression.ValueType)

Example 20 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameMatrixCastingTest method readMatrixOrFrameInput.

/**
	 * 
	 * @param fname
	 * @param rows
	 * @param cols
	 * @param dt
	 * @return
	 * @throws DMLRuntimeException
	 * @throws IOException
	 */
private double[][] readMatrixOrFrameInput(String fname, int rows, int cols, DataType dt) throws DMLRuntimeException, IOException {
    MatrixBlock ret = null;
    //read input data
    if (dt == DataType.FRAME) {
        FrameReader reader = FrameReaderFactory.createFrameReader(InputInfo.BinaryBlockInputInfo);
        FrameBlock fb = reader.readFrameFromHDFS(fname, rows, cols);
        ret = DataConverter.convertToMatrixBlock(fb);
    } else {
        int blksize = ConfigurationManager.getBlocksize();
        MatrixReader reader = MatrixReaderFactory.createMatrixReader(InputInfo.BinaryBlockInputInfo);
        ret = reader.readMatrixFromHDFS(fname, rows, cols, blksize, blksize, -1);
    }
    return DataConverter.convertToDoubleMatrix(ret);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader) MatrixReader(org.apache.sysml.runtime.io.MatrixReader)

Aggregations

FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)82 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)31 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)23 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)22 ValueType (org.apache.sysml.parser.Expression.ValueType)21 IOException (java.io.IOException)17 FrameReader (org.apache.sysml.runtime.io.FrameReader)17 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)14 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)12 LongWritable (org.apache.hadoop.io.LongWritable)10 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)10 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)10 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)9 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)8 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)7 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)6 Text (org.apache.hadoop.io.Text)5 ArrayList (java.util.ArrayList)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4