Search in sources :

Example 1 with Node

use of org.apache.nutch.scoring.webgraph.Node in project nutch by apache.

the class NodeReader method read.

@Override
public List read(String path) throws FileNotFoundException {
    List<HashMap> rows = new ArrayList<>();
    Path file = new Path(path);
    SequenceFile.Reader reader;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Node value = new Node();
        while (reader.next(key, value)) {
            try {
                HashMap<String, String> t_row = getNodeRow(key, value);
                rows.add(t_row);
            } catch (Exception e) {
            }
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return rows;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) Node(org.apache.nutch.scoring.webgraph.Node) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) WebApplicationException(javax.ws.rs.WebApplicationException) SequenceFile(org.apache.hadoop.io.SequenceFile)

Example 2 with Node

use of org.apache.nutch.scoring.webgraph.Node in project nutch by apache.

the class NodeReader method slice.

@Override
public List slice(String path, int start, int end) throws FileNotFoundException {
    List<HashMap> rows = new ArrayList<>();
    Path file = new Path(path);
    SequenceFile.Reader reader;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Node value = new Node();
        int i = 0;
        // increment to read start position
        for (; i < start && reader.next(key, value); i++) {
        }
        while (reader.next(key, value) && i < end) {
            HashMap<String, String> t_row = getNodeRow(key, value);
            rows.add(t_row);
            i++;
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return rows;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) Node(org.apache.nutch.scoring.webgraph.Node) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException) SequenceFile(org.apache.hadoop.io.SequenceFile)

Example 3 with Node

use of org.apache.nutch.scoring.webgraph.Node in project nutch by apache.

the class NodeReader method head.

@Override
public List head(String path, int nrows) throws FileNotFoundException {
    List<HashMap> rows = new ArrayList<>();
    Path file = new Path(path);
    SequenceFile.Reader reader;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Node value = new Node();
        int i = 0;
        while (reader.next(key, value) && i < nrows) {
            HashMap<String, String> t_row = getNodeRow(key, value);
            rows.add(t_row);
            i++;
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return rows;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) Node(org.apache.nutch.scoring.webgraph.Node) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException) SequenceFile(org.apache.hadoop.io.SequenceFile)

Example 4 with Node

use of org.apache.nutch.scoring.webgraph.Node in project nutch by apache.

the class NodeReader method count.

@Override
public int count(String path) throws FileNotFoundException {
    Path file = new Path(path);
    SequenceFile.Reader reader;
    int i = 0;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Node value = new Node();
        while (reader.next(key, value)) {
            i++;
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return i;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) SequenceFile(org.apache.hadoop.io.SequenceFile) WebApplicationException(javax.ws.rs.WebApplicationException) Node(org.apache.nutch.scoring.webgraph.Node) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)4 IOException (java.io.IOException)4 WebApplicationException (javax.ws.rs.WebApplicationException)4 Path (org.apache.hadoop.fs.Path)4 SequenceFile (org.apache.hadoop.io.SequenceFile)4 Reader (org.apache.hadoop.io.SequenceFile.Reader)4 Writable (org.apache.hadoop.io.Writable)4 Node (org.apache.nutch.scoring.webgraph.Node)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3