use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class FrameMetaReadWriteTest method runFrameReadWriteTest.
/**
* @param sparseM1
* @param sparseM2
* @param instType
*/
private void runFrameReadWriteTest(OutputInfo oinfo, ExecType et) {
// rtplatform for MR
RUNTIME_PLATFORM platformOld = rtplatform;
switch(et) {
case MR:
rtplatform = RUNTIME_PLATFORM.HADOOP;
break;
case SPARK:
rtplatform = RUNTIME_PLATFORM.SPARK;
break;
default:
rtplatform = RUNTIME_PLATFORM.HYBRID;
break;
}
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
String ofmt = OutputInfo.outputInfoToStringExternal(oinfo);
try {
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME + ".dml";
programArgs = new String[] { "-explain", "-args", input("A"), String.valueOf(rows), String.valueOf(cols), ofmt, output("B") };
// data generation and write input
double[][] A = getRandomMatrix(rows, cols, -10, 10, 0.7, 3412);
FrameBlock fA = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(A), ValueType.STRING);
for (int j = 0; j < cols; j++) {
fA.getColumnMetadata(j).setMvValue(String.valueOf(j + 1));
fA.getColumnMetadata(j).setNumDistinct(j + 1);
}
FrameWriterFactory.createFrameWriter(oinfo).writeFrameToHDFS(fA, input("A"), rows, cols);
// run testcase
runTest(true, false, null, -1);
// read output and compare meta data
FrameBlock fB = FrameReaderFactory.createFrameReader(OutputInfo.getMatchingInputInfo(oinfo)).readFrameFromHDFS(output("B"), rows, cols);
for (int j = 0; j < cols; j++) {
Assert.assertEquals("MV meta data wrong!", fA.getColumnMetadata(j).getMvValue(), fB.getColumnMetadata(j).getMvValue());
Assert.assertEquals("Distinct meta data wrong!", fA.getColumnMetadata(j).getNumDistinct(), fB.getColumnMetadata(j).getNumDistinct());
}
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
} finally {
rtplatform = platformOld;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class FrameReadWriteTest method writeAndVerifyData.
void writeAndVerifyData(OutputInfo oinfo, FrameBlock frame1, FrameBlock frame2, CSVFileFormatProperties fprop) throws IOException {
String fname1 = SCRIPT_DIR + TEST_DIR + "/frameData1";
String fname2 = SCRIPT_DIR + TEST_DIR + "/frameData2";
// Create reader/writer
FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo, fprop);
FrameReader reader = FrameReaderFactory.createFrameReader(OutputInfo.getMatchingInputInfo(oinfo), fprop);
// Write frame data to disk
writer.writeFrameToHDFS(frame1, fname1, frame1.getNumRows(), frame1.getNumColumns());
writer.writeFrameToHDFS(frame2, fname2, frame2.getNumRows(), frame2.getNumColumns());
// Read frame data from disk
FrameBlock frame1Read = reader.readFrameFromHDFS(fname1, frame1.getSchema(), frame1.getNumRows(), frame1.getNumColumns());
FrameBlock frame2Read = reader.readFrameFromHDFS(fname2, frame2.getSchema(), frame2.getNumRows(), frame2.getNumColumns());
// Verify that data read with original frames
verifyFrameData(frame1, frame1Read);
verifyFrameData(frame2, frame2Read);
MapReduceTool.deleteFileIfExistOnHDFS(fname1);
MapReduceTool.deleteFileIfExistOnHDFS(fname2);
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class TransformFrameEncodeColmapTest method runTransformTest.
private void runTransformTest(String testname, RUNTIME_PLATFORM rt, String ofmt, boolean colnames) {
// set runtime platform
RUNTIME_PLATFORM rtold = rtplatform;
rtplatform = rt;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
// set transform specification
String DATASET = DATASET1;
String SPEC = colnames ? SPEC1b : SPEC1;
if (!ofmt.equals("csv"))
throw new RuntimeException("Unsupported test output format");
try {
getAndLoadTestConfiguration(testname);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + testname + ".dml";
programArgs = new String[] { "-explain", "-nvargs", "DATA=" + HOME + "input/" + DATASET, "TFSPEC=" + HOME + "input/" + SPEC, "TFDATA=" + output("tfout"), "OFMT=" + ofmt, "OSEP=," };
runTest(true, false, null, -1);
// read input/output and compare
FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(true, ",", false));
FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
FrameBlock fb2 = reader2.readFrameFromHDFS(output("tfout"), -1L, -1L);
String[][] R1 = DataConverter.convertToStringFrame(fb1);
String[][] R2 = DataConverter.convertToStringFrame(fb2);
TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = rtold;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class TransformFrameEncodeDecodeTokenTest method runTransformTest.
/**
* @param rt
* @param ofmt
* @param dataset
*/
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt) {
// set runtime platform
RUNTIME_PLATFORM rtold = rtplatform;
rtplatform = rt;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
if (!ofmt.equals("csv"))
throw new RuntimeException("Unsupported test output format");
try {
getAndLoadTestConfiguration(TEST_NAME1);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-nvargs", "DATA=" + HOME + "input/" + DATASET1, "TFSPEC=" + HOME + "input/" + SPEC1, "TFDATA=" + output("tfout"), "SEP= ", "OFMT=" + ofmt, "OSEP= " };
runTest(true, false, null, -1);
// read input/output and compare
FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, " ", false));
FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET1, -1L, -1L);
FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, " ", false));
FrameBlock fb2 = reader2.readFrameFromHDFS(output("tfout"), -1L, -1L);
String[][] R1 = DataConverter.convertToStringFrame(fb1);
String[][] R2 = DataConverter.convertToStringFrame(fb2);
TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
if (rt == RUNTIME_PLATFORM.HYBRID_SPARK) {
Assert.assertEquals("Wrong number of executed Spark instructions: " + Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
}
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = rtold;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class FrameObject method readBlobFromHDFS.
@Override
protected FrameBlock readBlobFromHDFS(String fname, long rlen, long clen) throws IOException {
MetaDataFormat iimd = (MetaDataFormat) _metaData;
MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
// handle missing schema if necessary
ValueType[] lschema = (_schema != null) ? _schema : UtilFunctions.nCopies(clen >= 1 ? (int) clen : 1, ValueType.STRING);
// read the frame block
FrameBlock data = null;
try {
FrameReader reader = FrameReaderFactory.createFrameReader(iimd.getInputInfo(), getFileFormatProperties());
data = reader.readFrameFromHDFS(fname, lschema, mc.getRows(), mc.getCols());
} catch (DMLRuntimeException ex) {
throw new IOException(ex);
}
// sanity check correct output
if (data == null)
throw new IOException("Unable to load frame from file: " + fname);
return data;
}
Aggregations