use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.
the class FullGroupedAggregateTest method runGroupedAggregateOperationTest.
/* TODO weighted central moment in R
@Test
public void testGroupedAggMoment4DenseWeightsMR()
{
runGroupedAggregateOperationTest(OpType.MOMENT4, false, true, false, ExecType.MR);
}
@Test
public void testGroupedAggMoment4SparseWeightsMR()
{
runGroupedAggregateOperationTest(OpType.MOMENT4, true, true, false, ExecType.MR);
}
*/
/**
* @param sparseM1
* @param sparseM2
* @param instType
* @throws IOException
*/
private void runGroupedAggregateOperationTest(OpType type, boolean sparse, boolean weights, boolean transpose, ExecType instType) {
// rtplatform for MR
RUNTIME_PLATFORM platformOld = rtplatform;
switch(instType) {
case MR:
rtplatform = RUNTIME_PLATFORM.HADOOP;
break;
case SPARK:
rtplatform = RUNTIME_PLATFORM.SPARK;
break;
default:
rtplatform = RUNTIME_PLATFORM.HYBRID;
break;
}
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
try {
// determine script and function name
String TEST_NAME = weights ? TEST_NAME2 : TEST_NAME1;
int fn = type.ordinal();
double sparsity = (sparse) ? sparsity1 : sparsity2;
String TEST_CACHE_DIR = "";
if (TEST_CACHE_ENABLED) {
TEST_CACHE_DIR = TEST_NAME + type.ordinal() + "_" + sparsity + "_" + transpose + "/";
}
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config, TEST_CACHE_DIR);
// This is for running the junit test the new way, i.e., construct the arguments directly
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME + ".dml";
if (!weights) {
programArgs = new String[] { "-explain", "-args", input("A"), input("B"), Integer.toString(fn), output("C") };
} else {
programArgs = new String[] { "-args", input("A"), input("B"), input("C"), Integer.toString(fn), output("D") };
}
fullRScriptName = HOME + TEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + fn + " " + expectedDir();
// generate actual dataset
double[][] A = getRandomMatrix(transpose ? cols : rows, transpose ? rows : cols, -0.05, 1, sparsity, 7);
writeInputMatrix("A", A, true);
MatrixCharacteristics mc1 = new MatrixCharacteristics(transpose ? cols : rows, transpose ? rows : cols, 1000, 1000);
MapReduceTool.writeMetaDataFile(input("A.mtd"), ValueType.DOUBLE, mc1, OutputInfo.TextCellOutputInfo);
double[][] B = TestUtils.round(getRandomMatrix(rows, cols, 1, numGroups, 1.0, 3));
writeInputMatrix("B", B, true);
MatrixCharacteristics mc2 = new MatrixCharacteristics(rows, cols, 1000, 1000);
MapReduceTool.writeMetaDataFile(input("B.mtd"), ValueType.DOUBLE, mc2, OutputInfo.TextCellOutputInfo);
if (weights) {
// currently we use integer weights due to our definition of weight as multiplicity
double[][] C = TestUtils.round(getRandomMatrix(rows, cols, 1, maxWeight, 1.0, 3));
writeInputMatrix("C", C, true);
MatrixCharacteristics mc3 = new MatrixCharacteristics(rows, cols, 1000, 1000);
MapReduceTool.writeMetaDataFile(input("C.mtd"), ValueType.DOUBLE, mc3, OutputInfo.TextCellOutputInfo);
}
// run tests
runTest(true, false, null, -1);
runRScript(true);
// compare matrices
HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(weights ? "D" : "C");
HashMap<CellIndex, Double> rfile = readRMatrixFromFS(weights ? "D" : "C");
TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
} catch (IOException ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
} finally {
rtplatform = platformOld;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.
the class FullRowAggregateTest method runRowAggregateOperationTest.
/**
* @param sparseM1
* @param sparseM2
* @param instType
*/
private void runRowAggregateOperationTest(OpType type, boolean sparse, boolean vector, ExecType instType, boolean specialData, boolean rewrites) {
RUNTIME_PLATFORM platformOld = rtplatform;
switch(instType) {
case MR:
rtplatform = RUNTIME_PLATFORM.HADOOP;
break;
case SPARK:
rtplatform = RUNTIME_PLATFORM.SPARK;
break;
default:
rtplatform = RUNTIME_PLATFORM.HYBRID;
break;
}
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
boolean oldRewritesFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
try {
String TEST_NAME = null;
switch(type) {
case ROW_SUMS:
TEST_NAME = TEST_NAME1;
break;
case ROW_MEANS:
TEST_NAME = TEST_NAME2;
break;
case ROW_MAX:
TEST_NAME = TEST_NAME3;
break;
case ROW_MIN:
TEST_NAME = TEST_NAME4;
break;
case ROW_INDEXMAX:
TEST_NAME = TEST_NAME5;
break;
case ROW_INDEXMIN:
TEST_NAME = TEST_NAME6;
break;
}
int rows = (vector) ? rows1 : rows2;
double sparsity = (sparse) ? sparsity1 : sparsity2;
TestConfiguration config = getTestConfiguration(TEST_NAME);
// generate actual dataset
double min, max;
// in case of ROW_INDEXMIN, generate all positive data.
if (type == OpType.ROW_INDEXMAX) {
// special data: negative, 0 is actual max
min = specialData ? -1 : -0.05;
max = specialData ? -0.05 : 1;
} else if (type == OpType.ROW_INDEXMIN) {
// special data: positive, 0 is actual min
min = specialData ? 0.05 : -1;
max = specialData ? 1 : 0.05;
} else {
min = -0.05;
max = 1;
}
String TEST_CACHE_DIR = "";
if (TEST_CACHE_ENABLED) {
TEST_CACHE_DIR = type.ordinal() + "_" + rows + "_" + specialData + "_" + sparsity + "/";
}
loadTestConfiguration(config, TEST_CACHE_DIR);
/* This is for running the junit test the new way, i.e., construct the arguments directly */
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME + ".dml";
programArgs = new String[] { "-explain", "-args", input("A"), Integer.toString(rows), Integer.toString(cols), output("B") };
fullRScriptName = HOME + TEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 7);
writeInputMatrix("A", A, true);
boolean exceptionExpected = false;
runTest(true, exceptionExpected, null, -1);
runRScript(true);
// compare matrices
HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B");
HashMap<CellIndex, Double> rfile = readRMatrixFromFS("B");
TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
} finally {
rtplatform = platformOld;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewritesFlag;
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.
the class RowVariancesTest method testRowVariances.
/**
* Test the row variances function, "rowVars(X)", on
* dense/sparse matrices/vectors on the CP/Spark/MR platforms.
*
* @param testName The name of this test case.
* @param sparsity Selection between empty, sparse, and dense data.
* @param dataType Selection between a matrix, a row vector, and a
* column vector.
* @param rewrites Whether or not to employ algebraic rewrites.
* @param platform Selection between CP/Spark/MR platforms.
*/
private void testRowVariances(String testName, Sparsity sparsity, DataType dataType, boolean rewrites, ExecType platform) {
// Configure settings for this test case
boolean rewritesOld = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
RUNTIME_PLATFORM platformOld = rtplatform;
switch(platform) {
case MR:
rtplatform = RUNTIME_PLATFORM.HADOOP;
break;
case SPARK:
rtplatform = RUNTIME_PLATFORM.SPARK;
break;
default:
rtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
break;
}
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
try {
// Create and load test configuration
getAndLoadTestConfiguration(testName);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + testName + ".dml";
programArgs = new String[] { "-explain", "-stats", "-args", input(INPUT_NAME), output(OUTPUT_NAME) };
fullRScriptName = HOME + testName + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
// Generate data
// - sparsity
double sparsityVal;
switch(sparsity) {
case EMPTY:
sparsityVal = 0;
break;
case SPARSE:
sparsityVal = sparsitySparse;
break;
case DENSE:
default:
sparsityVal = sparsityDense;
}
// - size
int r;
int c;
switch(dataType) {
case ROWVECTOR:
r = 1;
c = cols;
break;
case COLUMNVECTOR:
r = rows;
c = 1;
break;
case MATRIX:
default:
r = rows;
c = cols;
}
// - generation
double[][] X = getRandomMatrix(r, c, -1, 1, sparsityVal, 7);
writeInputMatrixWithMTD(INPUT_NAME, X, true);
// Run DML and R scripts
runTest(true, false, null, -1);
runRScript(true);
// Compare output matrices
HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(OUTPUT_NAME);
HashMap<CellIndex, Double> rfile = readRMatrixFromFS(OUTPUT_NAME);
TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
// rewritten to an empty row vector of zeros.
if (rewrites && (platform == ExecType.SPARK || platform == ExecType.CP)) {
String prefix = (platform == ExecType.SPARK) ? Instruction.SP_INST_PREFIX : "";
if (dataType == DataType.ROWVECTOR) {
String opcode = prefix + varOp;
boolean rewriteApplied = Statistics.getCPHeavyHitterOpCodes().contains(opcode);
Assert.assertTrue("Rewrite not applied to row vector case.", rewriteApplied);
} else if (dataType == DataType.COLUMNVECTOR) {
String opcode = prefix + rowVarOp;
boolean rewriteApplied = !Statistics.getCPHeavyHitterOpCodes().contains(opcode);
Assert.assertTrue("Rewrite not applied to column vector case.", rewriteApplied);
}
}
} finally {
// Reset settings
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewritesOld;
rtplatform = platformOld;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.
the class StdDevTest method testStdDev.
/**
* Test the standard deviation function, "sd(X)", on
* dense/sparse matrices/vectors on the CP/Spark/MR platforms.
*
* @param testName The name of this test case.
* @param sparsity Selection between empty, sparse, and dense data.
* @param dataType Selection between a matrix, a row vector, and a
* column vector.
* @param platform Selection between CP/Spark/MR platforms.
*/
private void testStdDev(String testName, Sparsity sparsity, DataType dataType, ExecType platform) {
// Configure settings for this test case
RUNTIME_PLATFORM platformOld = rtplatform;
switch(platform) {
case MR:
rtplatform = RUNTIME_PLATFORM.HADOOP;
break;
case SPARK:
rtplatform = RUNTIME_PLATFORM.SPARK;
break;
default:
rtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
break;
}
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
try {
// Create and load test configuration
getAndLoadTestConfiguration(testName);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + testName + ".dml";
programArgs = new String[] { "-explain", "-stats", "-args", input(INPUT_NAME), output(OUTPUT_NAME) };
fullRScriptName = HOME + testName + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
// Generate data
// - sparsity
double sparsityVal;
switch(sparsity) {
case EMPTY:
sparsityVal = 0;
break;
case SPARSE:
sparsityVal = sparsitySparse;
break;
case DENSE:
default:
sparsityVal = sparsityDense;
}
// - size
int r;
int c;
switch(dataType) {
case ROWVECTOR:
r = 1;
c = cols;
break;
case COLUMNVECTOR:
r = rows;
c = 1;
break;
case MATRIX:
default:
r = rows;
c = cols;
}
// - generation
double[][] X = getRandomMatrix(r, c, -1, 1, sparsityVal, 7);
writeInputMatrixWithMTD(INPUT_NAME, X, true);
// Run DML and R scripts
runTest(true, false, null, -1);
runRScript(true);
// Compare output matrices
HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(OUTPUT_NAME);
HashMap<CellIndex, Double> rfile = readRMatrixFromFS(OUTPUT_NAME);
TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
} finally {
// Reset settings
rtplatform = platformOld;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex in project incubator-systemml by apache.
the class AppendMatrixTest method commonAppendTest.
/**
* @param platform
* @param rows
* @param cols1
* @param cols2
* @param sparse
*/
public void commonAppendTest(RUNTIME_PLATFORM platform, int rows, int cols1, int cols2, boolean sparse, AppendMethod forcedAppendMethod) {
TestConfiguration config = getAndLoadTestConfiguration(TEST_NAME);
RUNTIME_PLATFORM prevPlfm = rtplatform;
double sparsity = (sparse) ? sparsity2 : sparsity1;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
try {
if (forcedAppendMethod != null) {
BinaryOp.FORCED_APPEND_METHOD = forcedAppendMethod;
}
rtplatform = platform;
if (rtplatform == RUNTIME_PLATFORM.SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
config.addVariable("rows", rows);
config.addVariable("cols", cols1);
/* This is for running the junit test the new way, i.e., construct the arguments directly */
String RI_HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = RI_HOME + TEST_NAME + ".dml";
programArgs = new String[] { "-args", input("A"), Long.toString(rows), Long.toString(cols1), input("B"), Long.toString(cols2), output("C") };
fullRScriptName = RI_HOME + TEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
Random rand = new Random(System.currentTimeMillis());
double[][] A = getRandomMatrix(rows, cols1, min, max, sparsity, System.currentTimeMillis());
writeInputMatrix("A", A, true);
sparsity = rand.nextDouble();
double[][] B = getRandomMatrix(rows, cols2, min, max, sparsity, System.currentTimeMillis());
writeInputMatrix("B", B, true);
boolean exceptionExpected = false;
int expectedCompiledMRJobs = (rtplatform == RUNTIME_PLATFORM.HADOOP) ? 2 : 1;
int expectedExecutedMRJobs = (rtplatform == RUNTIME_PLATFORM.HADOOP) ? 2 : 0;
runTest(true, exceptionExpected, null, expectedCompiledMRJobs);
runRScript(true);
Assert.assertEquals("Wrong number of executed MR jobs.", expectedExecutedMRJobs, Statistics.getNoOfExecutedMRJobs());
for (String file : config.getOutputFiles()) {
HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(file);
HashMap<CellIndex, Double> rfile = readRMatrixFromFS(file);
TestUtils.compareMatrices(dmlfile, rfile, epsilon, file + "-DML", file + "-R");
}
} finally {
// reset execution platform
rtplatform = prevPlfm;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
BinaryOp.FORCED_APPEND_METHOD = null;
}
}
Aggregations