use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class MatrixReaderFactory method createMatrixReader.
public static MatrixReader createMatrixReader(InputInfo iinfo) throws DMLRuntimeException {
MatrixReader reader = null;
if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderTextCellParallel(iinfo);
else
reader = new ReaderTextCell(iinfo);
} else if (iinfo == InputInfo.CSVInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderTextCSVParallel(new CSVFileFormatProperties());
else
reader = new ReaderTextCSV(new CSVFileFormatProperties());
} else if (iinfo == InputInfo.BinaryCellInputInfo)
reader = new ReaderBinaryCell();
else if (iinfo == InputInfo.BinaryBlockInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderBinaryBlockParallel(false);
else
reader = new ReaderBinaryBlock(false);
} else {
throw new DMLRuntimeException("Failed to create matrix reader for unknown input info: " + InputInfo.inputInfoToString(iinfo));
}
return reader;
}
use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class AutomatedTestBase method readRFrameFromHDFS.
protected static FrameBlock readRFrameFromHDFS(String fileName, InputInfo iinfo, MatrixCharacteristics md) throws DMLRuntimeException, IOException {
//read frame data from hdfs
String strFrameFileName = baseDirectory + EXPECTED_DIR + fileName;
CSVFileFormatProperties fprop = new CSVFileFormatProperties();
fprop.setHeader(true);
FrameReader reader = FrameReaderFactory.createFrameReader(iinfo, fprop);
return reader.readFrameFromHDFS(strFrameFileName, md.getRows(), md.getCols());
}
use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class WriterTextCSV method writeCSVMatrixToFile.
protected final void writeCSVMatrixToFile(Path path, JobConf job, FileSystem fs, MatrixBlock src, int rl, int ru, CSVFileFormatProperties props) throws IOException {
boolean sparse = src.isInSparseFormat();
int clen = src.getNumColumns();
//create buffered writer
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
try {
//for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
props = (props == null) ? new CSVFileFormatProperties() : props;
String delim = props.getDelim();
boolean csvsparse = props.isSparse();
// Write header line, if needed
if (props.hasHeader() && rl == 0) {
//write row chunk-wise to prevent OOM on large number of columns
for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
sb.append("C" + (j + 1));
if (j < clen - 1)
sb.append(delim);
}
br.write(sb.toString());
sb.setLength(0);
}
sb.append('\n');
br.write(sb.toString());
sb.setLength(0);
}
// Write data lines
if (//SPARSE
sparse) {
SparseBlock sblock = src.getSparseBlock();
for (int i = rl; i < ru; i++) {
//write row chunk-wise to prevent OOM on large number of columns
int prev_jix = -1;
if (sblock != null && i < sblock.numRows() && !sblock.isEmpty(i)) {
int pos = sblock.pos(i);
int alen = sblock.size(i);
int[] aix = sblock.indexes(i);
double[] avals = sblock.values(i);
for (int j = pos; j < pos + alen; j++) {
int jix = aix[j];
// output empty fields, if needed
for (int j2 = prev_jix; j2 < jix - 1; j2++) {
if (!csvsparse)
sb.append('0');
sb.append(delim);
//flush buffered string
if (j2 % BLOCKSIZE_J == 0) {
br.write(sb.toString());
sb.setLength(0);
}
}
// output the value (non-zero)
sb.append(avals[j]);
if (jix < clen - 1)
sb.append(delim);
br.write(sb.toString());
sb.setLength(0);
//flush buffered string
if (jix % BLOCKSIZE_J == 0) {
br.write(sb.toString());
sb.setLength(0);
}
prev_jix = jix;
}
}
// In case of an empty row, output (clen-1) empty fields
for (int bj = prev_jix + 1; bj < clen; bj += BLOCKSIZE_J) {
for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
if (!csvsparse)
sb.append('0');
if (j < clen - 1)
sb.append(delim);
}
br.write(sb.toString());
sb.setLength(0);
}
sb.append('\n');
br.write(sb.toString());
sb.setLength(0);
}
} else //DENSE
{
for (int i = rl; i < ru; i++) {
//write row chunk-wise to prevent OOM on large number of columns
for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
double lvalue = src.getValueDenseUnsafe(i, j);
if (//for nnz
lvalue != 0)
sb.append(lvalue);
else if (!csvsparse)
sb.append('0');
if (j != clen - 1)
sb.append(delim);
}
br.write(sb.toString());
sb.setLength(0);
}
sb.append('\n');
//same as append
br.write(sb.toString());
sb.setLength(0);
}
}
} finally {
IOUtilFunctions.closeSilently(br);
}
}
use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class MapReduceTool method metaDataToString.
public static String metaDataToString(ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo, FileFormatProperties formatProperties) throws JSONException, DMLRuntimeException {
// maintain order in output file
OrderedJSONObject mtd = new OrderedJSONObject();
//handle data type and value types (incl schema for frames)
mtd.put(DataExpression.DATATYPEPARAM, dt.toString().toLowerCase());
if (schema == null) {
mtd.put(DataExpression.VALUETYPEPARAM, vt.toString().toLowerCase());
} else {
StringBuffer schemaSB = new StringBuffer();
for (int i = 0; i < schema.length; i++) {
if (schema[i] == ValueType.UNKNOWN)
schemaSB.append("*");
else
schemaSB.append(schema[i].toString());
schemaSB.append(DataExpression.DEFAULT_DELIM_DELIMITER);
}
mtd.put(DataExpression.SCHEMAPARAM, schemaSB.toString());
}
//handle output dimensions
if (!dt.isScalar()) {
mtd.put(DataExpression.READROWPARAM, mc.getRows());
mtd.put(DataExpression.READCOLPARAM, mc.getCols());
// handle output nnz and binary block configuration
if (dt.isMatrix()) {
if (outinfo == OutputInfo.BinaryBlockOutputInfo) {
mtd.put(DataExpression.ROWBLOCKCOUNTPARAM, mc.getRowsPerBlock());
mtd.put(DataExpression.COLUMNBLOCKCOUNTPARAM, mc.getColsPerBlock());
}
mtd.put(DataExpression.READNUMNONZEROPARAM, mc.getNonZeros());
}
}
//handle format type and additional arguments
mtd.put(DataExpression.FORMAT_TYPE, OutputInfo.outputInfoToStringExternal(outinfo));
if (outinfo == OutputInfo.CSVOutputInfo) {
CSVFileFormatProperties csvProperties = (formatProperties == null) ? new CSVFileFormatProperties() : (CSVFileFormatProperties) formatProperties;
mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, csvProperties.hasHeader());
mtd.put(DataExpression.DELIM_DELIMITER, csvProperties.getDelim());
}
if (formatProperties != null) {
String description = formatProperties.getDescription();
if (StringUtils.isNotEmpty(description)) {
String jsonDescription = StringEscapeUtils.escapeJson(description);
mtd.put(DataExpression.DESCRIPTIONPARAM, jsonDescription);
}
}
String userName = System.getProperty("user.name");
if (StringUtils.isNotEmpty(userName)) {
mtd.put(DataExpression.AUTHORPARAM, userName);
} else {
mtd.put(DataExpression.AUTHORPARAM, "SystemML");
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z");
mtd.put(DataExpression.CREATEDPARAM, sdf.format(new Date()));
// indent with 4 spaces
return mtd.toString(4);
}
use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.
the class FrameReadWriteTest method runFrameReadWriteTest.
/**
*
* @param sparseM1
* @param sparseM2
* @param instType
*/
private void runFrameReadWriteTest(OutputInfo oinfo, ValueType[] schema1, ValueType[] schema2, boolean parallel) {
boolean oldParText = CompilerConfig.FLAG_PARREADWRITE_TEXT;
boolean oldParBin = CompilerConfig.FLAG_PARREADWRITE_BINARY;
try {
CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel;
CompilerConfig.FLAG_PARREADWRITE_BINARY = parallel;
ConfigurationManager.setGlobalConfig(new CompilerConfig());
//data generation
double[][] A = getRandomMatrix(rows, schema1.length, -10, 10, 0.9, 2373);
double[][] B = getRandomMatrix(rows, schema2.length, -10, 10, 0.9, 129);
//Initialize the frame data.
//init data frame 1
FrameBlock frame1 = new FrameBlock(schema1);
initFrameData(frame1, A, schema1);
//init data frame 2
FrameBlock frame2 = new FrameBlock(schema2);
initFrameData(frame2, B, schema2);
//Write frame data to disk
CSVFileFormatProperties fprop = new CSVFileFormatProperties();
fprop.setDelim(DELIMITER);
fprop.setHeader(HEADER);
writeAndVerifyData(oinfo, frame1, frame2, fprop);
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
} finally {
CompilerConfig.FLAG_PARREADWRITE_TEXT = oldParText;
CompilerConfig.FLAG_PARREADWRITE_BINARY = oldParBin;
ConfigurationManager.setGlobalConfig(new CompilerConfig());
}
}
Aggregations