use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class WriterTextCSV method writeCSVMatrixToFile.
protected static void writeCSVMatrixToFile(Path path, JobConf job, FileSystem fs, MatrixBlock src, int rl, int ru, CSVFileFormatProperties props) throws IOException {
boolean sparse = src.isInSparseFormat();
int clen = src.getNumColumns();
// create buffered writer
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
try {
// for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
props = (props == null) ? new CSVFileFormatProperties() : props;
String delim = props.getDelim();
boolean csvsparse = props.isSparse();
// Write header line, if needed
if (props.hasHeader() && rl == 0) {
// write row chunk-wise to prevent OOM on large number of columns
for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
sb.append("C" + (j + 1));
if (j < clen - 1)
sb.append(delim);
}
br.write(sb.toString());
sb.setLength(0);
}
sb.append('\n');
br.write(sb.toString());
sb.setLength(0);
}
// Write data lines
if (// SPARSE
sparse) {
SparseBlock sblock = src.getSparseBlock();
for (int i = rl; i < ru; i++) {
// write row chunk-wise to prevent OOM on large number of columns
int prev_jix = -1;
if (sblock != null && i < sblock.numRows() && !sblock.isEmpty(i)) {
int pos = sblock.pos(i);
int alen = sblock.size(i);
int[] aix = sblock.indexes(i);
double[] avals = sblock.values(i);
for (int j = pos; j < pos + alen; j++) {
int jix = aix[j];
// output empty fields, if needed
for (int j2 = prev_jix; j2 < jix - 1; j2++) {
if (!csvsparse)
sb.append('0');
sb.append(delim);
// flush buffered string
if (j2 % BLOCKSIZE_J == 0) {
br.write(sb.toString());
sb.setLength(0);
}
}
// output the value (non-zero)
sb.append(avals[j]);
if (jix < clen - 1)
sb.append(delim);
br.write(sb.toString());
sb.setLength(0);
// flush buffered string
if (jix % BLOCKSIZE_J == 0) {
br.write(sb.toString());
sb.setLength(0);
}
prev_jix = jix;
}
}
// In case of an empty row, output (clen-1) empty fields
for (int bj = prev_jix + 1; bj < clen; bj += BLOCKSIZE_J) {
for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
if (!csvsparse)
sb.append('0');
if (j < clen - 1)
sb.append(delim);
}
br.write(sb.toString());
sb.setLength(0);
}
sb.append('\n');
br.write(sb.toString());
sb.setLength(0);
}
} else // DENSE
{
for (int i = rl; i < ru; i++) {
// write row chunk-wise to prevent OOM on large number of columns
for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
double lvalue = src.getValueDenseUnsafe(i, j);
if (// for nnz
lvalue != 0)
sb.append(lvalue);
else if (!csvsparse)
sb.append('0');
if (j != clen - 1)
sb.append(delim);
}
br.write(sb.toString());
sb.setLength(0);
}
sb.append('\n');
// same as append
br.write(sb.toString());
sb.setLength(0);
}
}
} finally {
IOUtilFunctions.closeSilently(br);
}
}
use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class FrameReadWriteTest method runFrameReadWriteTest.
/**
* @param sparseM1
* @param sparseM2
* @param instType
*/
private void runFrameReadWriteTest(OutputInfo oinfo, ValueType[] schema1, ValueType[] schema2, boolean parallel) {
boolean oldParText = CompilerConfig.FLAG_PARREADWRITE_TEXT;
boolean oldParBin = CompilerConfig.FLAG_PARREADWRITE_BINARY;
try {
CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel;
CompilerConfig.FLAG_PARREADWRITE_BINARY = parallel;
ConfigurationManager.setGlobalConfig(new CompilerConfig());
// data generation
double[][] A = getRandomMatrix(rows, schema1.length, -10, 10, 0.9, 2373);
double[][] B = getRandomMatrix(rows, schema2.length, -10, 10, 0.9, 129);
// Initialize the frame data.
// init data frame 1
FrameBlock frame1 = new FrameBlock(schema1);
initFrameData(frame1, A, schema1);
// init data frame 2
FrameBlock frame2 = new FrameBlock(schema2);
initFrameData(frame2, B, schema2);
// Write frame data to disk
CSVFileFormatProperties fprop = new CSVFileFormatProperties();
fprop.setDelim(DELIMITER);
fprop.setHeader(HEADER);
writeAndVerifyData(oinfo, frame1, frame2, fprop);
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
} finally {
CompilerConfig.FLAG_PARREADWRITE_TEXT = oldParText;
CompilerConfig.FLAG_PARREADWRITE_BINARY = oldParBin;
ConfigurationManager.setGlobalConfig(new CompilerConfig());
}
}
use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class FrameCSVReadWriteTest method runCSVQuotesReadWriteTest.
/**
* @param rt
* @param ofmt
* @param dataset
*/
private void runCSVQuotesReadWriteTest(RUNTIME_PLATFORM rt, String ofmt) {
// set runtime platform
RUNTIME_PLATFORM rtold = rtplatform;
rtplatform = rt;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
if (!ofmt.equals("csv"))
throw new RuntimeException("Unsupported test output format");
try {
getAndLoadTestConfiguration(TEST_NAME1);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-args", HOME + "input/" + DATASET, output("R") };
runTest(true, false, null, -1);
// read input/output and compare
FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, ",", false));
FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
String[][] R1 = DataConverter.convertToStringFrame(fb1);
String[][] R2 = DataConverter.convertToStringFrame(fb2);
TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = rtold;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class TransformCSVFrameEncodeDecodeTest method runTransformTest.
/**
* @param rt
* @param ofmt
* @param dataset
*/
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt) {
// set runtime platform
RUNTIME_PLATFORM rtold = rtplatform;
rtplatform = rt;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
if (!ofmt.equals("csv"))
throw new RuntimeException("Unsupported test output format");
try {
getAndLoadTestConfiguration(TEST_NAME1);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-args", HOME + "input/" + DATASET, output("R") };
runTest(true, false, null, -1);
// read input/output and compare
FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, ",", false));
FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
String[][] R1 = DataConverter.convertToStringFrame(fb1);
String[][] R2 = DataConverter.convertToStringFrame(fb2);
TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
if (rt == RUNTIME_PLATFORM.HYBRID_SPARK) {
Assert.assertEquals("Wrong number of executed Spark instructions: " + Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
}
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = rtold;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class TransformCSVFrameEncodeReadTest method runTransformTest.
/**
* @param rt
* @param ofmt
* @param dataset
*/
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt, boolean subset, boolean parRead) {
// set runtime platform
RUNTIME_PLATFORM rtold = rtplatform;
rtplatform = rt;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
if (!ofmt.equals("csv"))
throw new RuntimeException("Unsupported test output format");
try {
getAndLoadTestConfiguration(TEST_NAME1);
String HOME = SCRIPT_DIR + TEST_DIR;
int nrows = subset ? 4 : 13;
fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
programArgs = new String[] { "-explain", "-stats", "-args", HOME + "input/" + DATASET, String.valueOf(nrows), output("R") };
runTest(true, false, null, -1);
// read input/output and compare
FrameReader reader2 = parRead ? new FrameReaderTextCSVParallel(new CSVFileFormatProperties()) : new FrameReaderTextCSV(new CSVFileFormatProperties());
FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
System.out.println(DataConverter.toString(fb2));
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = rtold;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
Aggregations