use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameReaderTextCSV method readFrameFromInputStream.
@Override
public FrameBlock readFrameFromInputStream(InputStream is, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
// allocate output frame block
ValueType[] lschema = createOutputSchema(schema, clen);
String[] lnames = createOutputNames(names, clen);
FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
// core read (sequential/parallel)
InputStreamInputFormat informat = new InputStreamInputFormat(is);
InputSplit split = informat.getSplits(null, 1)[0];
readCSVFrameFromInputSplit(split, informat, null, ret, schema, names, rlen, clen, 0, true);
return ret;
}
use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameReaderTextCSV method readFrameFromHDFS.
@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
// prepare file access
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
FileInputFormat.addInputPath(job, path);
// check existence and non-empty file
checkValidInputFile(fs, path);
// compute size if necessary
if (rlen <= 0 || clen <= 0) {
Pair<Integer, Integer> size = computeCSVSize(path, job, fs);
rlen = size.getKey();
clen = size.getValue();
}
// allocate output frame block
ValueType[] lschema = createOutputSchema(schema, clen);
String[] lnames = createOutputNames(names, clen);
FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
// core read (sequential/parallel)
readCSVFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen);
return ret;
}
use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameReaderTextCell method readFrameFromHDFS.
@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
// allocate output frame block
ValueType[] lschema = createOutputSchema(schema, clen);
String[] lnames = createOutputNames(names, clen);
FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
// prepare file access
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// check existence and non-empty file
checkValidInputFile(fs, path);
// core read (sequential/parallel)
readTextCellFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen);
return ret;
}
use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameReaderTextCell method readTextCellFrameFromInputSplit.
protected static void readTextCellFrameFromInputSplit(InputSplit split, TextInputFormat informat, JobConf job, FrameBlock dest) throws IOException {
ValueType[] schema = dest.getSchema();
int rlen = dest.getNumRows();
int clen = dest.getNumColumns();
// create record reader
RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
int row = -1;
int col = -1;
try {
while (reader.next(key, value)) {
// reinit tokenizer
st.reset(value.toString());
row = st.nextInt() - 1;
col = st.nextInt() - 1;
if (row == -3)
dest.getColumnMetadata(col).setMvValue(st.nextToken());
else if (row == -2)
dest.getColumnMetadata(col).setNumDistinct(st.nextLong());
else
dest.set(row, col, UtilFunctions.stringToObject(schema[col], st.nextToken()));
}
} catch (Exception ex) {
// post-mortem error handling and bounds checking
if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) {
throw new IOException("Frame cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall frame range [1:" + rlen + ",1:" + clen + "].");
} else {
throw new IOException("Unable to read frame in text cell format.", ex);
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameReaderTextCell method readFrameFromInputStream.
@Override
public final FrameBlock readFrameFromInputStream(InputStream is, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
// allocate output frame block
ValueType[] lschema = createOutputSchema(schema, clen);
String[] lnames = createOutputNames(names, clen);
FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
// core read
readRawTextCellFrameFromInputStream(is, ret, lschema, lnames, rlen, clen);
return ret;
}
Aggregations