use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.
the class ResultMergeLocalFile method createTextCellStagingFile.
private static void createTextCellStagingFile(String fnameStaging, MatrixObject mo, long ID) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(mo.getFileName());
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
InputSplit[] splits = informat.getSplits(job, 1);
LinkedList<Cell> buffer = new LinkedList<>();
LongWritable key = new LongWritable();
Text value = new Text();
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
// long row = -1, col = -1; //FIXME needs reconsideration whenever textcell is used actively
// NOTE MB: Originally, we used long row, col but this led reproducibly to JIT compilation
// errors during runtime; experienced under WINDOWS, Intel x86-64, IBM JDK 64bit/32bit.
// It works fine with int row, col but we require long for larger matrices.
// Since, textcell is never used for result merge (hybrid/hadoop: binaryblock, singlenode:binarycell)
// we just propose the to exclude it with -Xjit:exclude={package.method*}(count=0,optLevel=0)
FastStringTokenizer st = new FastStringTokenizer(' ');
for (InputSplit split : splits) {
RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
try {
while (reader.next(key, value)) {
// reset tokenizer
st.reset(value.toString());
long row = st.nextLong();
long col = st.nextLong();
double lvalue = Double.parseDouble(st.nextToken());
Cell tmp = new Cell(row, col, lvalue);
buffer.addLast(tmp);
if (// periodic flush
buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
}
// final flush
if (!buffer.isEmpty()) {
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.
the class StagingFileUtils method readCellListFromLocal.
public static LinkedList<Cell> readCellListFromLocal(String fname) throws IOException {
FileInputStream fis = new FileInputStream(fname);
BufferedReader in = new BufferedReader(new InputStreamReader(fis));
LinkedList<Cell> buffer = new LinkedList<>();
try {
String value = null;
FastStringTokenizer st = new FastStringTokenizer(' ');
while ((value = in.readLine()) != null) {
// reset tokenizer
st.reset(value);
long row = st.nextLong();
long col = st.nextLong();
double lvalue = st.nextDouble();
Cell c = new Cell(row, col, lvalue);
buffer.addLast(c);
}
} finally {
IOUtilFunctions.closeSilently(in);
}
return buffer;
}
use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.
the class DataPartitionerLocal method partitionTextCell.
private void partitionTextCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
long row = -1;
long col = -1;
try {
// STEP 1: read matrix from HDFS and write blocks to local staging area
// check and add input path
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
InputSplit[] splits = informat.getSplits(job, 1);
LinkedList<Cell> buffer = new LinkedList<>();
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
for (InputSplit split : splits) {
RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
try {
while (reader.next(key, value)) {
// reset tokenizer
st.reset(value.toString());
row = st.nextLong();
col = st.nextLong();
double lvalue = st.nextDouble();
Cell tmp = new Cell(row, col, lvalue);
buffer.addLast(tmp);
if (// periodic flush
buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
buffer.clear();
}
}
// final flush
if (!buffer.isEmpty()) {
appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
// STEP 2: read matrix blocks from staging area and write matrix to HDFS
String[] fnamesPartitions = new File(fnameStaging).list();
if (PARALLEL) {
int len = Math.min(fnamesPartitions.length, _par);
Thread[] threads = new Thread[len];
for (int i = 0; i < len; i++) {
int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
end = Math.min(end, fnamesPartitions.length - 1);
threads[i] = new Thread(new DataPartitionerWorkerTextCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
threads[i].start();
}
for (Thread t : threads) t.join();
} else {
for (String pdir : fnamesPartitions) writeTextCellFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
}
} catch (Exception e) {
// post-mortem error handling and bounds checking
if (row < 1 || row > rlen || col < 1 || col > clen) {
throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
} else
throw new DMLRuntimeException("Unable to partition text cell matrix.", e);
}
}
use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.
the class FrameReaderTextCell method readTextCellFrameFromInputSplit.
protected static void readTextCellFrameFromInputSplit(InputSplit split, TextInputFormat informat, JobConf job, FrameBlock dest) throws IOException {
ValueType[] schema = dest.getSchema();
int rlen = dest.getNumRows();
int clen = dest.getNumColumns();
// create record reader
RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
int row = -1;
int col = -1;
try {
while (reader.next(key, value)) {
// reinit tokenizer
st.reset(value.toString());
row = st.nextInt() - 1;
col = st.nextInt() - 1;
if (row == -3)
dest.getColumnMetadata(col).setMvValue(st.nextToken());
else if (row == -2)
dest.getColumnMetadata(col).setNumDistinct(st.nextLong());
else
dest.set(row, col, UtilFunctions.stringToObject(schema[col], st.nextToken()));
}
} catch (Exception ex) {
// post-mortem error handling and bounds checking
if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) {
throw new IOException("Frame cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall frame range [1:" + rlen + ",1:" + clen + "].");
} else {
throw new IOException("Unable to read frame in text cell format.", ex);
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.
the class FrameReaderTextCell method readRawTextCellFrameFromInputStream.
protected static void readRawTextCellFrameFromInputStream(InputStream is, FrameBlock dest, ValueType[] schema, String[] names, long rlen, long clen) throws IOException {
// create buffered reader
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String value = null;
FastStringTokenizer st = new FastStringTokenizer(' ');
int row = -1;
int col = -1;
try {
while ((value = br.readLine()) != null) {
// reinit tokenizer
st.reset(value);
row = st.nextInt() - 1;
col = st.nextInt() - 1;
if (row == -3)
dest.getColumnMetadata(col).setMvValue(st.nextToken());
else if (row == -2)
dest.getColumnMetadata(col).setNumDistinct(st.nextLong());
else
dest.set(row, col, UtilFunctions.stringToObject(schema[col], st.nextToken()));
}
} catch (Exception ex) {
// post-mortem error handling and bounds checking
if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) {
throw new IOException("Frame cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall frame range [1:" + rlen + ",1:" + clen + "].", ex);
} else {
throw new IOException("Unable to read frame in raw text cell format.", ex);
}
} finally {
IOUtilFunctions.closeSilently(br);
}
}
Aggregations