Search in sources :

Example 21 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class DataConverter method convertToMatrixBlock.

/**
 * Converts a frame block with arbitrary schema into a matrix block.
 * Since matrix block only supports value type double, we do a best
 * effort conversion of non-double types which might result in errors
 * for non-numerical data.
 *
 * @param frame frame block
 * @return matrix block
 */
public static MatrixBlock convertToMatrixBlock(FrameBlock frame) {
    int m = frame.getNumRows();
    int n = frame.getNumColumns();
    MatrixBlock mb = new MatrixBlock(m, n, false);
    mb.allocateDenseBlock();
    ValueType[] schema = frame.getSchema();
    int dFreq = UtilFunctions.frequency(schema, ValueType.DOUBLE);
    if (dFreq == schema.length) {
        // special case double schema (without cell-object creation,
        // cache-friendly row-column copy)
        double[][] a = new double[n][];
        double[] c = mb.getDenseBlockValues();
        for (int j = 0; j < n; j++) a[j] = (double[]) frame.getColumnData(j);
        // blocks of a+overhead/c in L1 cache
        int blocksizeIJ = 16;
        for (int bi = 0; bi < m; bi += blocksizeIJ) for (int bj = 0; bj < n; bj += blocksizeIJ) {
            int bimin = Math.min(bi + blocksizeIJ, m);
            int bjmin = Math.min(bj + blocksizeIJ, n);
            for (int i = bi, aix = bi * n; i < bimin; i++, aix += n) for (int j = bj; j < bjmin; j++) c[aix + j] = a[j][i];
        }
    } else {
        // general case
        for (int i = 0; i < frame.getNumRows(); i++) for (int j = 0; j < frame.getNumColumns(); j++) {
            mb.appendValue(i, j, UtilFunctions.objectToDouble(schema[j], frame.get(i, j)));
        }
    }
    // post-processing
    mb.examSparsity();
    return mb;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ValueType(org.apache.sysml.parser.Expression.ValueType)

Example 22 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameCastingTest method runFrameCastingTest.

/**
 * @param sparseM1
 * @param sparseM2
 * @param instType
 */
private void runFrameCastingTest(ValueType[] schema, CastType ctype) {
    try {
        // data generation
        double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 2412);
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < schema.length; j++) A[i][j] = UtilFunctions.objectToDouble(schema[j], UtilFunctions.doubleToObject(schema[j], A[i][j]));
        }
        // core casting operations
        FrameBlock frame = null;
        if (ctype == CastType.F2M) {
            // construct input schema
            FrameBlock frame1 = new FrameBlock(schema);
            Object[] row1 = new Object[schema.length];
            for (int i = 0; i < rows; i++) {
                for (int j = 0; j < schema.length; j++) row1[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]);
                frame1.appendRow(row1);
            }
            MatrixBlock mb = DataConverter.convertToMatrixBlock(frame1);
            frame = DataConverter.convertToFrameBlock(mb);
        } else if (ctype == CastType.M2F_G) {
            MatrixBlock mb = DataConverter.convertToMatrixBlock(A);
            frame = DataConverter.convertToFrameBlock(mb);
        } else if (ctype == CastType.M2F_S) {
            MatrixBlock mb = DataConverter.convertToMatrixBlock(A);
            frame = DataConverter.convertToFrameBlock(mb, schema);
        }
        // check basic meta data
        if (frame.getNumRows() != rows)
            Assert.fail("Wrong number of rows: " + frame.getNumRows() + ", expected: " + rows);
        // check correct values
        ValueType[] lschema = frame.getSchema();
        for (int i = 0; i < rows; i++) for (int j = 0; j < lschema.length; j++) {
            double tmp = UtilFunctions.objectToDouble(lschema[j], frame.get(i, j));
            if (tmp != A[i][j])
                Assert.fail("Wrong get value for cell (" + i + "," + j + "): " + tmp + ", expected: " + A[i][j]);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) ValueType(org.apache.sysml.parser.Expression.ValueType)

Example 23 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameIndexingDistTest method runTestLeftIndexing.

private void runTestLeftIndexing(ExecType et, LeftIndexingOp.LeftIndexingMethod indexingMethod, ValueType[] schema, IXType itype, boolean bSparse) throws IOException {
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    RUNTIME_PLATFORM oldRTP = rtplatform;
    TestConfiguration config = null;
    HashMap<String, ValueType[]> outputSchema = new HashMap<String, ValueType[]>();
    if (itype == IXType.LIX)
        config = getTestConfiguration("FrameLeftIndexing");
    else
        config = getTestConfiguration("FrameRightIndexing");
    try {
        if (indexingMethod != null) {
            LeftIndexingOp.FORCED_LEFT_INDEXING = indexingMethod;
        }
        if (et == ExecType.SPARK) {
            rtplatform = RUNTIME_PLATFORM.SPARK;
        } else {
            // rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE;
            rtplatform = RUNTIME_PLATFORM.HYBRID;
        }
        if (rtplatform == RUNTIME_PLATFORM.SPARK)
            DMLScript.USE_LOCAL_SPARK_CONFIG = true;
        config.addVariable("rows", rows);
        config.addVariable("cols", cols);
        long rowstart = 816, rowend = 1229, colstart = 109, /*967*/
        colend = 1009;
        config.addVariable("rowstart", rowstart);
        config.addVariable("rowend", rowend);
        config.addVariable("colstart", colstart);
        config.addVariable("colend", colend);
        loadTestConfiguration(config);
        if (itype == IXType.LIX) {
            /* This is for running the junit test the new way, i.e., construct the arguments directly */
            String LI_HOME = SCRIPT_DIR + TEST_DIR;
            fullDMLScriptName = LI_HOME + TEST_NAME + ".dml";
            programArgs = new String[] { "-args", input("A"), Long.toString(rows), Long.toString(cols), Long.toString(rowstart), Long.toString(rowend), Long.toString(colstart), Long.toString(colend), output("AB"), output("AC"), output("AD"), input("B"), input("C"), input("D"), Long.toString(rowend - rowstart + 1), Long.toString(colend - colstart + 1), Long.toString(cols - colstart + 1) };
            fullRScriptName = LI_HOME + TEST_NAME + ".R";
            rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir();
            // initialize the frame data.
            // rand.nextDouble();
            double sparsity = sparsity1;
            double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
            writeInputFrameWithMTD("A", A, true, schema, OutputInfo.BinaryBlockOutputInfo);
            // rand.nextDouble();
            sparsity = sparsity3;
            double[][] B = getRandomMatrix((int) (rowend - rowstart + 1), (int) (colend - colstart + 1), min, max, sparsity, 2345);
            ValueType[] lschemaB = Arrays.copyOfRange(schema, (int) colstart - 1, (int) colend);
            writeInputFrameWithMTD("B", B, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
            // rand.nextDouble();
            sparsity = sparsity2;
            double[][] C = getRandomMatrix((int) (rowend), (int) (cols - colstart + 1), min, max, sparsity, 3267);
            ValueType[] lschemaC = Arrays.copyOfRange(schema, (int) colstart - 1, (int) cols);
            writeInputFrameWithMTD("C", C, true, lschemaC, OutputInfo.BinaryBlockOutputInfo);
            // rand.nextDoublBe();
            sparsity = sparsity4;
            double[][] D = getRandomMatrix(rows, (int) (colend - colstart + 1), min, max, sparsity, 4856);
            writeInputFrameWithMTD("D", D, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
            boolean exceptionExpected = false;
            int expectedNumberOfJobs = -1;
            runTest(true, exceptionExpected, null, expectedNumberOfJobs);
            for (String file : config.getOutputFiles()) outputSchema.put(file, schema);
        } else {
            /* This is for running the junit test the new way, i.e., construct the arguments directly */
            String RI_HOME = SCRIPT_DIR + TEST_DIR;
            fullDMLScriptName = RI_HOME + RTEST_NAME + ".dml";
            programArgs = new String[] { "-stats", "-explain", "-args", input("A"), Long.toString(rows), Long.toString(cols), Long.toString(rowstart), Long.toString(rowend), Long.toString(colstart), Long.toString(colend), output("B"), output("C"), output("D") };
            fullRScriptName = RI_HOME + RTEST_NAME + ".R";
            rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir();
            // initialize the frame data.
            double sparsity = bSparse ? sparsity4 : sparsity2;
            double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
            writeInputFrameWithMTD("A", A, true, schema, OutputInfo.BinaryBlockOutputInfo);
            ValueType[] schemaB = new ValueType[(int) (colend - colstart + 1)];
            System.arraycopy(schema, (int) (colstart - 1), schemaB, 0, (int) (colend - colstart + 1));
            outputSchema.put(config.getOutputFiles()[0], schemaB);
            ValueType[] schemaC = new ValueType[(int) (cols - colstart + 1)];
            System.arraycopy(schema, (int) (colstart - 1), schemaC, 0, (int) (cols - colstart + 1));
            outputSchema.put(config.getOutputFiles()[1], schemaC);
            outputSchema.put(config.getOutputFiles()[2], schemaB);
            boolean exceptionExpected = false;
            int expectedNumberOfJobs = -1;
            runTest(true, exceptionExpected, null, expectedNumberOfJobs);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        rtplatform = oldRTP;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
        LeftIndexingOp.FORCED_LEFT_INDEXING = null;
    }
    runRScript(true);
    for (String file : config.getOutputFiles()) {
        FrameBlock frameBlock = readDMLFrameFromHDFS(file, InputInfo.BinaryBlockInputInfo);
        MatrixCharacteristics md = new MatrixCharacteristics(frameBlock.getNumRows(), frameBlock.getNumColumns(), -1, -1);
        FrameBlock frameRBlock = readRFrameFromHDFS(file + ".csv", InputInfo.CSVInputInfo, md);
        ValueType[] schemaOut = outputSchema.get(file);
        verifyFrameData(frameBlock, frameRBlock, schemaOut);
        System.out.println("File processed is " + file);
    }
}
Also used : HashMap(java.util.HashMap) ValueType(org.apache.sysml.parser.Expression.ValueType) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock)

Example 24 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameIndexingTest method runFrameIndexingTest.

/**
 * @param sparseM1
 * @param sparseM2
 * @param instType
 */
private void runFrameIndexingTest(ValueType[] schema, IXType itype) {
    try {
        // data generation
        double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 2412);
        // init data frame 1
        FrameBlock frame1 = new FrameBlock(schema);
        Object[] row1 = new Object[schema.length];
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < schema.length; j++) A[i][j] = UtilFunctions.objectToDouble(schema[j], row1[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]));
            frame1.appendRow(row1);
        }
        // core indexing operation
        MatrixBlock mbC = null;
        FrameBlock frame3 = null;
        if (itype == IXType.RIX) {
            // matrix indexing
            MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
            mbC = mbA.slice(rl, ru, cl, cu, new MatrixBlock());
            // frame indexing
            frame3 = frame1.slice(rl, ru, cl, cu, new FrameBlock());
        } else if (itype == IXType.LIX) {
            // data generation
            double[][] B = getRandomMatrix(ru - rl + 1, cu - cl + 1, -10, 10, 0.9, 7);
            // init data frame 2
            ValueType[] lschema2 = new ValueType[cu - cl + 1];
            for (int j = cl; j <= cu; j++) lschema2[j - cl] = schema[j];
            FrameBlock frame2 = new FrameBlock(lschema2);
            Object[] row2 = new Object[lschema2.length];
            for (int i = 0; i < ru - rl + 1; i++) {
                for (int j = 0; j < lschema2.length; j++) B[i][j] = UtilFunctions.objectToDouble(lschema2[j], row2[j] = UtilFunctions.doubleToObject(lschema2[j], B[i][j]));
                frame2.appendRow(row2);
            }
            // matrix indexing
            MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
            MatrixBlock mbB = DataConverter.convertToMatrixBlock(B);
            mbC = mbA.leftIndexingOperations(mbB, rl, ru, cl, cu, new MatrixBlock(), UpdateType.COPY);
            // frame indexing
            frame3 = frame1.leftIndexingOperations(frame2, rl, ru, cl, cu, new FrameBlock());
        }
        // check basic meta data
        if (frame3.getNumRows() != mbC.getNumRows())
            Assert.fail("Wrong number of rows: " + frame3.getNumRows() + ", expected: " + mbC.getNumRows());
        // check correct values
        ValueType[] lschema = frame3.getSchema();
        for (int i = 0; i < ru - rl + 1; i++) for (int j = 0; j < lschema.length; j++) {
            double tmp = UtilFunctions.objectToDouble(lschema[j], frame3.get(i, j));
            if (tmp != mbC.quickGetValue(i, j))
                Assert.fail("Wrong get value for cell (" + i + "," + j + "): " + tmp + ", expected: " + mbC.quickGetValue(i, j));
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) ValueType(org.apache.sysml.parser.Expression.ValueType)

Example 25 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameMatrixCastingTest method runFrameCastingTest.

/**
 * @param testname
 * @param schema
 * @param wildcard
 */
private void runFrameCastingTest(String testname, boolean multColBlks, ValueType vt, ExecType et) {
    // rtplatform for MR
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(et) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.HYBRID;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    try {
        int cols = multColBlks ? cols2 : cols1;
        TestConfiguration config = getTestConfiguration(testname);
        loadTestConfiguration(config);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + testname + ".dml";
        programArgs = new String[] { "-explain", "-args", input("A"), output("B") };
        // data generation
        double[][] A = getRandomMatrix(rows, cols, -1, 1, 0.9, 7);
        DataType dtin = testname.equals(TEST_NAME1) ? DataType.FRAME : DataType.MATRIX;
        ValueType vtin = testname.equals(TEST_NAME1) ? vt : ValueType.DOUBLE;
        writeMatrixOrFrameInput(input("A"), A, rows, cols, dtin, vtin);
        // run testcase
        runTest(true, false, null, -1);
        // compare matrices
        DataType dtout = testname.equals(TEST_NAME1) ? DataType.MATRIX : DataType.FRAME;
        double[][] B = readMatrixOrFrameInput(output("B"), rows, cols, dtout);
        TestUtils.compareMatrices(A, B, rows, cols, 0);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) ValueType(org.apache.sysml.parser.Expression.ValueType) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) DataType(org.apache.sysml.parser.Expression.DataType) IOException(java.io.IOException)

Aggregations

ValueType (org.apache.sysml.parser.Expression.ValueType)55 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)23 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)19 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)18 DataType (org.apache.sysml.parser.Expression.DataType)11 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)10 IOException (java.io.IOException)9 LongWritable (org.apache.hadoop.io.LongWritable)7 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 ArrayList (java.util.ArrayList)6 Text (org.apache.hadoop.io.Text)6 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)6 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)5 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)5 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)5 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)5 Row (org.apache.spark.sql.Row)4 StructType (org.apache.spark.sql.types.StructType)4 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)4