use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class TransformApplyEmptyRecodeMapTest method testTransformApplyEmptyRecodeMap.
@Test
public void testTransformApplyEmptyRecodeMap() {
try {
// generate input data
FrameBlock data = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(getRandomMatrix(rows, cols, 1, 1, 1, 7)));
FrameBlock meta = new FrameBlock(new ValueType[] { ValueType.STRING }, new String[] { "C1" });
// execute transform apply
Encoder encoder = EncoderFactory.createEncoder("{ids:true, recode:[1]}", data.getColumnNames(), meta.getSchema(), meta);
MatrixBlock out = encoder.apply(data, new MatrixBlock(rows, cols, true));
// check outputs
Assert.assertEquals(rows, out.getNumRows());
Assert.assertEquals(cols, out.getNumColumns());
for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) Assert.assertTrue(Double.isNaN(out.quickGetValue(i, j)));
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class TransformCSVFrameEncodeDecodeTest method runTransformTest.
/**
* @param rt
* @param ofmt
* @param dataset
*/
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt) {
// set runtime platform
RUNTIME_PLATFORM rtold = rtplatform;
rtplatform = rt;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
if (!ofmt.equals("csv"))
throw new RuntimeException("Unsupported test output format");
try {
getAndLoadTestConfiguration(TEST_NAME1);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-args", HOME + "input/" + DATASET, output("R") };
runTest(true, false, null, -1);
// read input/output and compare
FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, ",", false));
FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
String[][] R1 = DataConverter.convertToStringFrame(fb1);
String[][] R2 = DataConverter.convertToStringFrame(fb2);
TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
if (rt == RUNTIME_PLATFORM.HYBRID_SPARK) {
Assert.assertEquals("Wrong number of executed Spark instructions: " + Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
}
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = rtold;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class TransformCSVFrameEncodeReadTest method runTransformTest.
/**
* @param rt
* @param ofmt
* @param dataset
*/
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt, boolean subset, boolean parRead) {
// set runtime platform
RUNTIME_PLATFORM rtold = rtplatform;
rtplatform = rt;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
if (!ofmt.equals("csv"))
throw new RuntimeException("Unsupported test output format");
try {
getAndLoadTestConfiguration(TEST_NAME1);
String HOME = SCRIPT_DIR + TEST_DIR;
int nrows = subset ? 4 : 13;
fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-stats", "-args", HOME + "input/" + DATASET, String.valueOf(nrows), output("R") };
runTest(true, false, null, -1);
// read input/output and compare
FrameReader reader2 = parRead ? new FrameReaderTextCSVParallel(new CSVFileFormatProperties()) : new FrameReaderTextCSV(new CSVFileFormatProperties());
FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
System.out.println(DataConverter.toString(fb2));
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = rtold;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class TransformEncodeDecodeTest method runTransformEncodeDecodeTest.
private void runTransformEncodeDecodeTest(ExecType et, boolean sparse, String fmt) {
RUNTIME_PLATFORM platformOld = rtplatform;
// only CP supported
rtplatform = RUNTIME_PLATFORM.HYBRID;
try {
getAndLoadTestConfiguration(TEST_NAME1);
// get input/output info
InputInfo iinfo = InputInfo.stringExternalToInputInfo(fmt);
OutputInfo oinfo = InputInfo.getMatchingOutputInfo(iinfo);
// generate and write input data
double[][] A = TestUtils.round(getRandomMatrix(rows, cols, 1, 15, sparse ? sparsity2 : sparsity1, 7));
FrameBlock FA = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(A));
FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
writer.writeFrameToHDFS(FA, input("F"), rows, cols);
fullDMLScriptName = SCRIPT_DIR + TEST_DIR + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-args", input("F"), fmt, String.valueOf(rows), String.valueOf(cols), SCRIPT_DIR + TEST_DIR + SPEC, output("FO") };
// run test
runTest(true, false, null, -1);
// compare matrices (values recoded to identical codes)
FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
FrameBlock FO = reader.readFrameFromHDFS(output("FO"), 16, 2);
HashMap<String, Long> cFA = getCounts(FA, 1);
Iterator<String[]> iterFO = FO.getStringRowIterator();
while (iterFO.hasNext()) {
String[] row = iterFO.next();
Double expected = (double) cFA.get(row[1]);
Double val = (row[0] != null) ? Double.valueOf(row[0]) : 0;
Assert.assertEquals("Output aggregates don't match: " + expected + " vs " + val, expected, val);
}
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
} finally {
rtplatform = platformOld;
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class TransformFrameEncodeDecodeTest method runTransformTest.
/**
* @param rt
* @param ofmt
* @param dataset
*/
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt, TransformType type, boolean colnames) {
// set runtime platform
RUNTIME_PLATFORM rtold = rtplatform;
rtplatform = rt;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
// set transform specification
String SPEC = null;
String DATASET = null;
switch(type) {
case RECODE:
SPEC = colnames ? SPEC1b : SPEC1;
DATASET = DATASET1;
break;
case DUMMY:
SPEC = colnames ? SPEC2b : SPEC2;
DATASET = DATASET1;
break;
default:
throw new RuntimeException("Unsupported transform type for encode/decode test.");
}
if (!ofmt.equals("csv"))
throw new RuntimeException("Unsupported test output format");
try {
getAndLoadTestConfiguration(TEST_NAME1);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-nvargs", "DATA=" + HOME + "input/" + DATASET, "TFSPEC=" + HOME + "input/" + SPEC, "TFDATA=" + output("tfout"), "SEP=,", "OFMT=" + ofmt, "OSEP=," };
// Originally OSEP was set to
// OSEP=","
// Apache Commons CLI strips away the leading and trailing quotes, leaving us with
// OSEP=",
// This is just a feature/bug and is reported in CLI-262,
// though even a fix is unlikely to be backported to 1.2
runTest(true, false, null, -1);
// read input/output and compare
FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(true, ",", false));
FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
FrameBlock fb2 = reader2.readFrameFromHDFS(output("tfout"), -1L, -1L);
String[][] R1 = DataConverter.convertToStringFrame(fb1);
String[][] R2 = DataConverter.convertToStringFrame(fb2);
TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
if (rt == RUNTIME_PLATFORM.HYBRID_SPARK) {
Assert.assertEquals("Wrong number of executed Spark instructions: " + Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
}
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = rtold;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
Aggregations