Search in sources :

Example 1 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class FullGroupedAggregateMatrixTest method runGroupedAggregateOperationTest.

/**
	 * 
	 * @param testname
	 * @param type
	 * @param sparse
	 * @param instType
	 */
@SuppressWarnings("rawtypes")
private void runGroupedAggregateOperationTest(String testname, OpType type, boolean sparse, ExecType instType, int numCols) {
    //rtplatform for MR
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(instType) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.HYBRID;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    try {
        //determine script and function name
        String TEST_NAME = testname;
        int fn = type.ordinal();
        double sparsity = (sparse) ? sparsity1 : sparsity2;
        String TEST_CACHE_DIR = TEST_CACHE_ENABLED ? TEST_NAME + type.ordinal() + "_" + sparsity + "_" + numCols + "/" : "";
        boolean exceptionExpected = !TEST_NAME.equals(TEST_NAME1);
        TestConfiguration config = getTestConfiguration(TEST_NAME);
        loadTestConfiguration(config, TEST_CACHE_DIR);
        // This is for running the junit test the new way, i.e., construct the arguments directly
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME + ".dml";
        programArgs = new String[] { "-explain", "-args", input("A"), input("B"), String.valueOf(fn), String.valueOf(numGroups), output("C") };
        fullRScriptName = HOME + TEST_NAME + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + fn + " " + expectedDir();
        //generate actual dataset 
        double[][] A = getRandomMatrix(rows, numCols, -0.05, 1, sparsity, 7);
        writeInputMatrix("A", A, true);
        MatrixCharacteristics mc1 = new MatrixCharacteristics(rows, numCols, 1000, 1000);
        MapReduceTool.writeMetaDataFile(input("A.mtd"), ValueType.DOUBLE, mc1, OutputInfo.TextCellOutputInfo);
        double[][] B = TestUtils.round(getRandomMatrix(rows, 1, 1, numGroups, 1.0, 3));
        writeInputMatrix("B", B, true);
        MatrixCharacteristics mc2 = new MatrixCharacteristics(rows, 1, 1000, 1000);
        MapReduceTool.writeMetaDataFile(input("B.mtd"), ValueType.DOUBLE, mc2, OutputInfo.TextCellOutputInfo);
        //run tests
        Class cla = (exceptionExpected ? DMLException.class : null);
        runTest(true, exceptionExpected, cla, -1);
        //compare matrices 
        if (!exceptionExpected) {
            //run R script for comparison
            runRScript(true);
            //compare output matrices
            HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("C");
            HashMap<CellIndex, Double> rfile = readRMatrixFromFS("C");
            TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
            //check dml output meta data
            checkDMLMetaDataFile("C", new MatrixCharacteristics(numGroups, numCols, 1, 1));
        }
    } catch (IOException ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : DMLException(org.apache.sysml.api.DMLException) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex) AfterClass(org.junit.AfterClass) BeforeClass(org.junit.BeforeClass)

Example 2 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class FullGroupedAggregateTest method runGroupedAggregateOperationTest.

/* TODO weighted central moment in R
	@Test
	public void testGroupedAggMoment4DenseWeightsMR() 
	{
		runGroupedAggregateOperationTest(OpType.MOMENT4, false, true, false, ExecType.MR);
	}
	
	@Test
	public void testGroupedAggMoment4SparseWeightsMR() 
	{
		runGroupedAggregateOperationTest(OpType.MOMENT4, true, true, false, ExecType.MR);
	}
	*/
/**
	 * 
	 * @param sparseM1
	 * @param sparseM2
	 * @param instType
	 * @throws IOException 
	 */
private void runGroupedAggregateOperationTest(OpType type, boolean sparse, boolean weights, boolean transpose, ExecType instType) {
    //rtplatform for MR
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(instType) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.HYBRID;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    try {
        //determine script and function name
        String TEST_NAME = weights ? TEST_NAME2 : TEST_NAME1;
        int fn = type.ordinal();
        double sparsity = (sparse) ? sparsity1 : sparsity2;
        String TEST_CACHE_DIR = "";
        if (TEST_CACHE_ENABLED) {
            TEST_CACHE_DIR = TEST_NAME + type.ordinal() + "_" + sparsity + "_" + transpose + "/";
        }
        TestConfiguration config = getTestConfiguration(TEST_NAME);
        loadTestConfiguration(config, TEST_CACHE_DIR);
        // This is for running the junit test the new way, i.e., construct the arguments directly
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME + ".dml";
        if (!weights) {
            programArgs = new String[] { "-explain", "-args", input("A"), input("B"), Integer.toString(fn), output("C") };
        } else {
            programArgs = new String[] { "-args", input("A"), input("B"), input("C"), Integer.toString(fn), output("D") };
        }
        fullRScriptName = HOME + TEST_NAME + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + fn + " " + expectedDir();
        //generate actual dataset 
        double[][] A = getRandomMatrix(transpose ? cols : rows, transpose ? rows : cols, -0.05, 1, sparsity, 7);
        writeInputMatrix("A", A, true);
        MatrixCharacteristics mc1 = new MatrixCharacteristics(transpose ? cols : rows, transpose ? rows : cols, 1000, 1000);
        MapReduceTool.writeMetaDataFile(input("A.mtd"), ValueType.DOUBLE, mc1, OutputInfo.TextCellOutputInfo);
        double[][] B = TestUtils.round(getRandomMatrix(rows, cols, 1, numGroups, 1.0, 3));
        writeInputMatrix("B", B, true);
        MatrixCharacteristics mc2 = new MatrixCharacteristics(rows, cols, 1000, 1000);
        MapReduceTool.writeMetaDataFile(input("B.mtd"), ValueType.DOUBLE, mc2, OutputInfo.TextCellOutputInfo);
        if (weights) {
            //currently we use integer weights due to our definition of weight as multiplicity
            double[][] C = TestUtils.round(getRandomMatrix(rows, cols, 1, maxWeight, 1.0, 3));
            writeInputMatrix("C", C, true);
            MatrixCharacteristics mc3 = new MatrixCharacteristics(rows, cols, 1000, 1000);
            MapReduceTool.writeMetaDataFile(input("C.mtd"), ValueType.DOUBLE, mc3, OutputInfo.TextCellOutputInfo);
        }
        //run tests
        runTest(true, false, null, -1);
        runRScript(true);
        //compare matrices 
        HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(weights ? "D" : "C");
        HashMap<CellIndex, Double> rfile = readRMatrixFromFS(weights ? "D" : "C");
        TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
    } catch (IOException ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)

Example 3 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class AggregateInfTest method runInfAggregateOperationTest.

/**
	 * 
	 * @param sparseM1
	 * @param sparseM2
	 * @param instType
	 */
private void runInfAggregateOperationTest(boolean pos, boolean sparse, ExecType instType) {
    //rtplatform for MR
    RUNTIME_PLATFORM platformOld = rtplatform;
    rtplatform = (instType == ExecType.MR) ? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.HYBRID;
    try {
        double sparsity = (sparse) ? sparsity1 : sparsity2;
        getAndLoadTestConfiguration(TEST_NAME);
        /* This is for running the junit test the new way, i.e., construct the arguments directly */
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME + ".dml";
        programArgs = new String[] { "-args", input("A"), output("B") };
        fullRScriptName = HOME + TEST_NAME + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
        //generate actual dataset 
        double[][] A = getRandomMatrix(rows, cols, -0.05, 1, sparsity, 7);
        double infval = pos ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
        A[7][7] = infval;
        writeInputMatrixWithMTD("A", A, false);
        //run test
        runTest(true, false, null, -1);
        //compare matrices 
        HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B");
        HashMap<CellIndex, Double> compfile = new HashMap<CellIndex, Double>();
        compfile.put(new CellIndex(1, 1), infval);
        TestUtils.compareMatrices(dmlfile, compfile, eps, "Stat-DML", "Stat-R");
    } finally {
        rtplatform = platformOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex) HashMap(java.util.HashMap)

Example 4 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class ColSumsSqTest method testColSumsSquared.

/**
     * Test the column sums of squared values function, "colSums(X^2)",
     * on dense/sparse matrices/vectors with rewrites/no rewrites on
     * the CP/Spark/MR platforms.
     *
     * @param testName The name of this test case.
     * @param sparse Whether or not the matrix/vector should be sparse.
     * @param vector Boolean value choosing between a vector and a matrix.
     * @param rewrites Whether or not to employ algebraic rewrites.
     * @param platform Selection between CP/Spark/MR platforms.
     */
private void testColSumsSquared(String testName, boolean sparse, boolean vector, boolean rewrites, ExecType platform) {
    // Configure settings for this test case
    boolean rewritesOld = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
    OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(platform) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    try {
        // Create and load test configuration
        getAndLoadTestConfiguration(testName);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + testName + ".dml";
        programArgs = new String[] { "-explain", "-stats", "-args", input(INPUT_NAME), output(OUTPUT_NAME) };
        fullRScriptName = HOME + testName + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
        // Generate data
        double sparsity = sparse ? sparsity2 : sparsity1;
        int columns = vector ? 1 : cols;
        double[][] X = getRandomMatrix(rows, columns, -1, 1, sparsity, 7);
        writeInputMatrixWithMTD(INPUT_NAME, X, true);
        // Run DML and R scripts
        runTest(true, false, null, -1);
        runRScript(true);
        // Compare output matrices
        HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(OUTPUT_NAME);
        HashMap<CellIndex, Double> rfile = readRMatrixFromFS(OUTPUT_NAME);
        TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
        // occurred for matrix cases and not for vector cases.
        if (rewrites && (platform == ExecType.SPARK || platform == ExecType.CP)) {
            String prefix = (platform == ExecType.SPARK) ? Instruction.SP_INST_PREFIX : "";
            String opcode = prefix + op;
            boolean rewriteApplied = Statistics.getCPHeavyHitterOpCodes().contains(opcode);
            if (vector)
                Assert.assertFalse("Rewrite applied to vector case.", rewriteApplied);
            else
                Assert.assertTrue("Rewrite not applied to matrix case.", rewriteApplied);
        }
    } finally {
        // Reset settings
        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewritesOld;
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)

Example 5 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class FullRowAggregateTest method runRowAggregateOperationTest.

/**
	 * 
	 * @param sparseM1
	 * @param sparseM2
	 * @param instType
	 */
private void runRowAggregateOperationTest(OpType type, boolean sparse, boolean vector, ExecType instType, boolean specialData, boolean rewrites) {
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(instType) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.HYBRID;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    boolean oldRewritesFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
    OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
    try {
        String TEST_NAME = null;
        switch(type) {
            case ROW_SUMS:
                TEST_NAME = TEST_NAME1;
                break;
            case ROW_MEANS:
                TEST_NAME = TEST_NAME2;
                break;
            case ROW_MAX:
                TEST_NAME = TEST_NAME3;
                break;
            case ROW_MIN:
                TEST_NAME = TEST_NAME4;
                break;
            case ROW_INDEXMAX:
                TEST_NAME = TEST_NAME5;
                break;
            case ROW_INDEXMIN:
                TEST_NAME = TEST_NAME6;
                break;
        }
        int rows = (vector) ? rows1 : rows2;
        double sparsity = (sparse) ? sparsity1 : sparsity2;
        TestConfiguration config = getTestConfiguration(TEST_NAME);
        //generate actual dataset
        double min, max;
        // in case of ROW_INDEXMIN, generate all positive data.
        if (type == OpType.ROW_INDEXMAX) {
            //special data: negative, 0 is actual max
            min = specialData ? -1 : -0.05;
            max = specialData ? -0.05 : 1;
        } else if (type == OpType.ROW_INDEXMIN) {
            //special data: positive, 0 is actual min
            min = specialData ? 0.05 : -1;
            max = specialData ? 1 : 0.05;
        } else {
            min = -0.05;
            max = 1;
        }
        String TEST_CACHE_DIR = "";
        if (TEST_CACHE_ENABLED) {
            TEST_CACHE_DIR = type.ordinal() + "_" + rows + "_" + specialData + "_" + sparsity + "/";
        }
        loadTestConfiguration(config, TEST_CACHE_DIR);
        /* This is for running the junit test the new way, i.e., construct the arguments directly */
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME + ".dml";
        programArgs = new String[] { "-explain", "-args", input("A"), Integer.toString(rows), Integer.toString(cols), output("B") };
        fullRScriptName = HOME + TEST_NAME + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
        double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 7);
        writeInputMatrix("A", A, true);
        boolean exceptionExpected = false;
        runTest(true, exceptionExpected, null, -1);
        runRScript(true);
        //compare matrices 
        HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B");
        HashMap<CellIndex, Double> rfile = readRMatrixFromFS("B");
        TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
    } finally {
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewritesFlag;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration)

Aggregations

CellIndex (org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)219 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)169 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)141 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)34 Test (org.junit.Test)20 HashMap (java.util.HashMap)16 IOException (java.io.IOException)14 Random (java.util.Random)9 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)8 ArrayList (java.util.ArrayList)7 BufferedReader (java.io.BufferedReader)6 StringTokenizer (java.util.StringTokenizer)6 InputStreamReader (java.io.InputStreamReader)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 Path (org.apache.hadoop.fs.Path)5 FileStatus (org.apache.hadoop.fs.FileStatus)4 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)3 FileReader (java.io.FileReader)2 DMLException (org.apache.sysml.api.DMLException)2