use of org.apache.hadoop.io.SequenceFile.Reader in project nutch by apache.
the class SequenceReader method read.
@Override
public List<List<String>> read(String path) throws FileNotFoundException {
// TODO Auto-generated method stub
List<List<String>> rows = new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try {
reader = new SequenceFile.Reader(conf, Reader.file(file));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
while (reader.next(key, value)) {
List<String> row = new ArrayList<>();
row.add(key.toString());
row.add(value.toString());
rows.add(row);
}
reader.close();
} catch (FileNotFoundException fne) {
throw new FileNotFoundException();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return rows;
}
use of org.apache.hadoop.io.SequenceFile.Reader in project nutch by apache.
the class NodeReader method slice.
@Override
public List slice(String path, int start, int end) throws FileNotFoundException {
List<HashMap> rows = new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try {
reader = new SequenceFile.Reader(conf, Reader.file(file));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Node value = new Node();
int i = 0;
// increment to read start position
for (; i < start && reader.next(key, value); i++) {
}
while (reader.next(key, value) && i < end) {
HashMap<String, String> t_row = getNodeRow(key, value);
rows.add(t_row);
i++;
}
reader.close();
} catch (FileNotFoundException fne) {
throw new FileNotFoundException();
} catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return rows;
}
use of org.apache.hadoop.io.SequenceFile.Reader in project nutch by apache.
the class NodeReader method head.
@Override
public List head(String path, int nrows) throws FileNotFoundException {
List<HashMap> rows = new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try {
reader = new SequenceFile.Reader(conf, Reader.file(file));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Node value = new Node();
int i = 0;
while (reader.next(key, value) && i < nrows) {
HashMap<String, String> t_row = getNodeRow(key, value);
rows.add(t_row);
i++;
}
reader.close();
} catch (FileNotFoundException fne) {
throw new FileNotFoundException();
} catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return rows;
}
use of org.apache.hadoop.io.SequenceFile.Reader in project nutch by apache.
the class NodeReader method count.
@Override
public int count(String path) throws FileNotFoundException {
Path file = new Path(path);
SequenceFile.Reader reader;
int i = 0;
try {
reader = new SequenceFile.Reader(conf, Reader.file(file));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Node value = new Node();
while (reader.next(key, value)) {
i++;
}
reader.close();
} catch (FileNotFoundException fne) {
throw new FileNotFoundException();
} catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return i;
}
use of org.apache.hadoop.io.SequenceFile.Reader in project incubator-systemml by apache.
the class TfUtils method initOffsetsReader.
private Reader initOffsetsReader(JobConf job) throws IOException {
Path path = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
Path[] files = MatrixReader.getSequenceFilePaths(fs, path);
if (files.length != 1)
throw new IOException("Expecting a single file under counters file: " + path.toString());
Reader reader = new SequenceFile.Reader(fs, files[0], job);
return reader;
}
Aggregations