use of org.apache.nutch.scoring.webgraph.Node in project nutch by apache.
the class NodeReader method read.
@Override
public List read(String path) throws FileNotFoundException {
List<HashMap> rows = new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try {
reader = new SequenceFile.Reader(conf, Reader.file(file));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Node value = new Node();
while (reader.next(key, value)) {
try {
HashMap<String, String> t_row = getNodeRow(key, value);
rows.add(t_row);
} catch (Exception e) {
}
}
reader.close();
} catch (FileNotFoundException fne) {
throw new FileNotFoundException();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return rows;
}
use of org.apache.nutch.scoring.webgraph.Node in project nutch by apache.
the class NodeReader method slice.
@Override
public List slice(String path, int start, int end) throws FileNotFoundException {
List<HashMap> rows = new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try {
reader = new SequenceFile.Reader(conf, Reader.file(file));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Node value = new Node();
int i = 0;
// increment to read start position
for (; i < start && reader.next(key, value); i++) {
}
while (reader.next(key, value) && i < end) {
HashMap<String, String> t_row = getNodeRow(key, value);
rows.add(t_row);
i++;
}
reader.close();
} catch (FileNotFoundException fne) {
throw new FileNotFoundException();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return rows;
}
use of org.apache.nutch.scoring.webgraph.Node in project nutch by apache.
the class NodeReader method head.
@Override
public List head(String path, int nrows) throws FileNotFoundException {
List<HashMap> rows = new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try {
reader = new SequenceFile.Reader(conf, Reader.file(file));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Node value = new Node();
int i = 0;
while (reader.next(key, value) && i < nrows) {
HashMap<String, String> t_row = getNodeRow(key, value);
rows.add(t_row);
i++;
}
reader.close();
} catch (FileNotFoundException fne) {
throw new FileNotFoundException();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return rows;
}
use of org.apache.nutch.scoring.webgraph.Node in project nutch by apache.
the class NodeReader method count.
@Override
public int count(String path) throws FileNotFoundException {
Path file = new Path(path);
SequenceFile.Reader reader;
int i = 0;
try {
reader = new SequenceFile.Reader(conf, Reader.file(file));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Node value = new Node();
while (reader.next(key, value)) {
i++;
}
reader.close();
} catch (FileNotFoundException fne) {
throw new FileNotFoundException();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return i;
}
Aggregations