Search in sources :

Example 26 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class FullGroupedAggregateTest method runGroupedAggregateOperationTest.

/* TODO weighted central moment in R
	@Test
	public void testGroupedAggMoment4DenseWeightsMR() 
	{
		runGroupedAggregateOperationTest(OpType.MOMENT4, false, true, false, ExecType.MR);
	}
	
	@Test
	public void testGroupedAggMoment4SparseWeightsMR() 
	{
		runGroupedAggregateOperationTest(OpType.MOMENT4, true, true, false, ExecType.MR);
	}
	*/
/**
 * @param sparseM1
 * @param sparseM2
 * @param instType
 * @throws IOException
 */
private void runGroupedAggregateOperationTest(OpType type, boolean sparse, boolean weights, boolean transpose, ExecType instType) {
    // rtplatform for MR
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(instType) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.HYBRID;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    try {
        // determine script and function name
        String TEST_NAME = weights ? TEST_NAME2 : TEST_NAME1;
        int fn = type.ordinal();
        double sparsity = (sparse) ? sparsity1 : sparsity2;
        String TEST_CACHE_DIR = "";
        if (TEST_CACHE_ENABLED) {
            TEST_CACHE_DIR = TEST_NAME + type.ordinal() + "_" + sparsity + "_" + transpose + "/";
        }
        TestConfiguration config = getTestConfiguration(TEST_NAME);
        loadTestConfiguration(config, TEST_CACHE_DIR);
        // This is for running the junit test the new way, i.e., construct the arguments directly
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME + ".dml";
        if (!weights) {
            programArgs = new String[] { "-explain", "-args", input("A"), input("B"), Integer.toString(fn), output("C") };
        } else {
            programArgs = new String[] { "-args", input("A"), input("B"), input("C"), Integer.toString(fn), output("D") };
        }
        fullRScriptName = HOME + TEST_NAME + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + fn + " " + expectedDir();
        // generate actual dataset
        double[][] A = getRandomMatrix(transpose ? cols : rows, transpose ? rows : cols, -0.05, 1, sparsity, 7);
        writeInputMatrix("A", A, true);
        MatrixCharacteristics mc1 = new MatrixCharacteristics(transpose ? cols : rows, transpose ? rows : cols, 1000, 1000);
        MapReduceTool.writeMetaDataFile(input("A.mtd"), ValueType.DOUBLE, mc1, OutputInfo.TextCellOutputInfo);
        double[][] B = TestUtils.round(getRandomMatrix(rows, cols, 1, numGroups, 1.0, 3));
        writeInputMatrix("B", B, true);
        MatrixCharacteristics mc2 = new MatrixCharacteristics(rows, cols, 1000, 1000);
        MapReduceTool.writeMetaDataFile(input("B.mtd"), ValueType.DOUBLE, mc2, OutputInfo.TextCellOutputInfo);
        if (weights) {
            // currently we use integer weights due to our definition of weight as multiplicity
            double[][] C = TestUtils.round(getRandomMatrix(rows, cols, 1, maxWeight, 1.0, 3));
            writeInputMatrix("C", C, true);
            MatrixCharacteristics mc3 = new MatrixCharacteristics(rows, cols, 1000, 1000);
            MapReduceTool.writeMetaDataFile(input("C.mtd"), ValueType.DOUBLE, mc3, OutputInfo.TextCellOutputInfo);
        }
        // run tests
        runTest(true, false, null, -1);
        runRScript(true);
        // compare matrices
        HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(weights ? "D" : "C");
        HashMap<CellIndex, Double> rfile = readRMatrixFromFS(weights ? "D" : "C");
        TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
    } catch (IOException ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)

Example 27 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class FullRowAggregateTest method runRowAggregateOperationTest.

/**
 * @param sparseM1
 * @param sparseM2
 * @param instType
 */
private void runRowAggregateOperationTest(OpType type, boolean sparse, boolean vector, ExecType instType, boolean specialData, boolean rewrites) {
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(instType) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.HYBRID;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    boolean oldRewritesFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
    OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
    try {
        String TEST_NAME = null;
        switch(type) {
            case ROW_SUMS:
                TEST_NAME = TEST_NAME1;
                break;
            case ROW_MEANS:
                TEST_NAME = TEST_NAME2;
                break;
            case ROW_MAX:
                TEST_NAME = TEST_NAME3;
                break;
            case ROW_MIN:
                TEST_NAME = TEST_NAME4;
                break;
            case ROW_INDEXMAX:
                TEST_NAME = TEST_NAME5;
                break;
            case ROW_INDEXMIN:
                TEST_NAME = TEST_NAME6;
                break;
        }
        int rows = (vector) ? rows1 : rows2;
        double sparsity = (sparse) ? sparsity1 : sparsity2;
        TestConfiguration config = getTestConfiguration(TEST_NAME);
        // generate actual dataset
        double min, max;
        // in case of ROW_INDEXMIN, generate all positive data.
        if (type == OpType.ROW_INDEXMAX) {
            // special data: negative, 0 is actual max
            min = specialData ? -1 : -0.05;
            max = specialData ? -0.05 : 1;
        } else if (type == OpType.ROW_INDEXMIN) {
            // special data: positive, 0 is actual min
            min = specialData ? 0.05 : -1;
            max = specialData ? 1 : 0.05;
        } else {
            min = -0.05;
            max = 1;
        }
        String TEST_CACHE_DIR = "";
        if (TEST_CACHE_ENABLED) {
            TEST_CACHE_DIR = type.ordinal() + "_" + rows + "_" + specialData + "_" + sparsity + "/";
        }
        loadTestConfiguration(config, TEST_CACHE_DIR);
        /* This is for running the junit test the new way, i.e., construct the arguments directly */
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME + ".dml";
        programArgs = new String[] { "-explain", "-args", input("A"), Integer.toString(rows), Integer.toString(cols), output("B") };
        fullRScriptName = HOME + TEST_NAME + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
        double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 7);
        writeInputMatrix("A", A, true);
        boolean exceptionExpected = false;
        runTest(true, exceptionExpected, null, -1);
        runRScript(true);
        // compare matrices
        HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B");
        HashMap<CellIndex, Double> rfile = readRMatrixFromFS("B");
        TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
    } finally {
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewritesFlag;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration)

Example 28 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class RowVariancesTest method testRowVariances.

/**
 * Test the row variances function, "rowVars(X)", on
 * dense/sparse matrices/vectors on the CP/Spark/MR platforms.
 *
 * @param testName The name of this test case.
 * @param sparsity Selection between empty, sparse, and dense data.
 * @param dataType Selection between a matrix, a row vector, and a
 *                 column vector.
 * @param rewrites Whether or not to employ algebraic rewrites.
 * @param platform Selection between CP/Spark/MR platforms.
 */
private void testRowVariances(String testName, Sparsity sparsity, DataType dataType, boolean rewrites, ExecType platform) {
    // Configure settings for this test case
    boolean rewritesOld = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
    OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(platform) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    try {
        // Create and load test configuration
        getAndLoadTestConfiguration(testName);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + testName + ".dml";
        programArgs = new String[] { "-explain", "-stats", "-args", input(INPUT_NAME), output(OUTPUT_NAME) };
        fullRScriptName = HOME + testName + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
        // Generate data
        // - sparsity
        double sparsityVal;
        switch(sparsity) {
            case EMPTY:
                sparsityVal = 0;
                break;
            case SPARSE:
                sparsityVal = sparsitySparse;
                break;
            case DENSE:
            default:
                sparsityVal = sparsityDense;
        }
        // - size
        int r;
        int c;
        switch(dataType) {
            case ROWVECTOR:
                r = 1;
                c = cols;
                break;
            case COLUMNVECTOR:
                r = rows;
                c = 1;
                break;
            case MATRIX:
            default:
                r = rows;
                c = cols;
        }
        // - generation
        double[][] X = getRandomMatrix(r, c, -1, 1, sparsityVal, 7);
        writeInputMatrixWithMTD(INPUT_NAME, X, true);
        // Run DML and R scripts
        runTest(true, false, null, -1);
        runRScript(true);
        // Compare output matrices
        HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(OUTPUT_NAME);
        HashMap<CellIndex, Double> rfile = readRMatrixFromFS(OUTPUT_NAME);
        TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
        // rewritten to an empty row vector of zeros.
        if (rewrites && (platform == ExecType.SPARK || platform == ExecType.CP)) {
            String prefix = (platform == ExecType.SPARK) ? Instruction.SP_INST_PREFIX : "";
            if (dataType == DataType.ROWVECTOR) {
                String opcode = prefix + varOp;
                boolean rewriteApplied = Statistics.getCPHeavyHitterOpCodes().contains(opcode);
                Assert.assertTrue("Rewrite not applied to row vector case.", rewriteApplied);
            } else if (dataType == DataType.COLUMNVECTOR) {
                String opcode = prefix + rowVarOp;
                boolean rewriteApplied = !Statistics.getCPHeavyHitterOpCodes().contains(opcode);
                Assert.assertTrue("Rewrite not applied to column vector case.", rewriteApplied);
            }
        }
    } finally {
        // Reset settings
        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewritesOld;
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)

Example 29 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class StdDevTest method testStdDev.

/**
 * Test the standard deviation function, "sd(X)", on
 * dense/sparse matrices/vectors on the CP/Spark/MR platforms.
 *
 * @param testName The name of this test case.
 * @param sparsity Selection between empty, sparse, and dense data.
 * @param dataType Selection between a matrix, a row vector, and a
 *                 column vector.
 * @param platform Selection between CP/Spark/MR platforms.
 */
private void testStdDev(String testName, Sparsity sparsity, DataType dataType, ExecType platform) {
    // Configure settings for this test case
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(platform) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    try {
        // Create and load test configuration
        getAndLoadTestConfiguration(testName);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + testName + ".dml";
        programArgs = new String[] { "-explain", "-stats", "-args", input(INPUT_NAME), output(OUTPUT_NAME) };
        fullRScriptName = HOME + testName + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
        // Generate data
        // - sparsity
        double sparsityVal;
        switch(sparsity) {
            case EMPTY:
                sparsityVal = 0;
                break;
            case SPARSE:
                sparsityVal = sparsitySparse;
                break;
            case DENSE:
            default:
                sparsityVal = sparsityDense;
        }
        // - size
        int r;
        int c;
        switch(dataType) {
            case ROWVECTOR:
                r = 1;
                c = cols;
                break;
            case COLUMNVECTOR:
                r = rows;
                c = 1;
                break;
            case MATRIX:
            default:
                r = rows;
                c = cols;
        }
        // - generation
        double[][] X = getRandomMatrix(r, c, -1, 1, sparsityVal, 7);
        writeInputMatrixWithMTD(INPUT_NAME, X, true);
        // Run DML and R scripts
        runTest(true, false, null, -1);
        runRScript(true);
        // Compare output matrices
        HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(OUTPUT_NAME);
        HashMap<CellIndex, Double> rfile = readRMatrixFromFS(OUTPUT_NAME);
        TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
    } finally {
        // Reset settings
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)

Example 30 with CellIndex

use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.

the class AppendMatrixTest method commonAppendTest.

/**
 * @param platform
 * @param rows
 * @param cols1
 * @param cols2
 * @param sparse
 */
public void commonAppendTest(RUNTIME_PLATFORM platform, int rows, int cols1, int cols2, boolean sparse, AppendMethod forcedAppendMethod) {
    TestConfiguration config = getAndLoadTestConfiguration(TEST_NAME);
    RUNTIME_PLATFORM prevPlfm = rtplatform;
    double sparsity = (sparse) ? sparsity2 : sparsity1;
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    try {
        if (forcedAppendMethod != null) {
            BinaryOp.FORCED_APPEND_METHOD = forcedAppendMethod;
        }
        rtplatform = platform;
        if (rtplatform == RUNTIME_PLATFORM.SPARK)
            DMLScript.USE_LOCAL_SPARK_CONFIG = true;
        config.addVariable("rows", rows);
        config.addVariable("cols", cols1);
        /* This is for running the junit test the new way, i.e., construct the arguments directly */
        String RI_HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = RI_HOME + TEST_NAME + ".dml";
        programArgs = new String[] { "-args", input("A"), Long.toString(rows), Long.toString(cols1), input("B"), Long.toString(cols2), output("C") };
        fullRScriptName = RI_HOME + TEST_NAME + ".R";
        rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
        Random rand = new Random(System.currentTimeMillis());
        double[][] A = getRandomMatrix(rows, cols1, min, max, sparsity, System.currentTimeMillis());
        writeInputMatrix("A", A, true);
        sparsity = rand.nextDouble();
        double[][] B = getRandomMatrix(rows, cols2, min, max, sparsity, System.currentTimeMillis());
        writeInputMatrix("B", B, true);
        boolean exceptionExpected = false;
        int expectedCompiledMRJobs = (rtplatform == RUNTIME_PLATFORM.HADOOP) ? 2 : 1;
        int expectedExecutedMRJobs = (rtplatform == RUNTIME_PLATFORM.HADOOP) ? 2 : 0;
        runTest(true, exceptionExpected, null, expectedCompiledMRJobs);
        runRScript(true);
        Assert.assertEquals("Wrong number of executed MR jobs.", expectedExecutedMRJobs, Statistics.getNoOfExecutedMRJobs());
        for (String file : config.getOutputFiles()) {
            HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(file);
            HashMap<CellIndex, Double> rfile = readRMatrixFromFS(file);
            TestUtils.compareMatrices(dmlfile, rfile, epsilon, file + "-DML", file + "-R");
        }
    } finally {
        // reset execution platform
        rtplatform = prevPlfm;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
        BinaryOp.FORCED_APPEND_METHOD = null;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) Random(java.util.Random) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration)

Aggregations

CellIndex (org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)257 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)201 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)173 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)37 Test (org.junit.Test)20 HashMap (java.util.HashMap)16 IOException (java.io.IOException)14 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)9 Random (java.util.Random)8 ArrayList (java.util.ArrayList)7 BufferedReader (java.io.BufferedReader)6 StringTokenizer (java.util.StringTokenizer)6 InputStreamReader (java.io.InputStreamReader)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 Path (org.apache.hadoop.fs.Path)5 FileStatus (org.apache.hadoop.fs.FileStatus)4 FileReader (java.io.FileReader)2 DMLException (org.apache.sysml.api.DMLException)2 MMultMethod (org.apache.sysml.hops.AggBinaryOp.MMultMethod)2