Search in sources :

Example 1 with LinkDatum

use of org.apache.nutch.scoring.webgraph.LinkDatum in project nutch by apache.

the class LinkReader method read.

@Override
public List read(String path) throws FileNotFoundException {
    List<HashMap> rows = new ArrayList<>();
    Path file = new Path(path);
    SequenceFile.Reader reader;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        LinkDatum value = new LinkDatum();
        while (reader.next(key, value)) {
            try {
                HashMap<String, String> t_row = getLinksRow(key, value);
                rows.add(t_row);
            } catch (Exception e) {
            }
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return rows;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) LinkDatum(org.apache.nutch.scoring.webgraph.LinkDatum) IOException(java.io.IOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) WebApplicationException(javax.ws.rs.WebApplicationException) SequenceFile(org.apache.hadoop.io.SequenceFile)

Example 2 with LinkDatum

use of org.apache.nutch.scoring.webgraph.LinkDatum in project nutch by apache.

the class LinkReader method head.

@Override
public List head(String path, int nrows) throws FileNotFoundException {
    List<HashMap> rows = new ArrayList<>();
    Path file = new Path(path);
    SequenceFile.Reader reader;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        LinkDatum value = new LinkDatum();
        int i = 0;
        while (reader.next(key, value) && i < nrows) {
            HashMap<String, String> t_row = getLinksRow(key, value);
            rows.add(t_row);
            i++;
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return rows;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) LinkDatum(org.apache.nutch.scoring.webgraph.LinkDatum) IOException(java.io.IOException) SequenceFile(org.apache.hadoop.io.SequenceFile)

Example 3 with LinkDatum

use of org.apache.nutch.scoring.webgraph.LinkDatum in project nutch by apache.

the class LinkReader method slice.

@Override
public List slice(String path, int start, int end) throws FileNotFoundException {
    List<HashMap> rows = new ArrayList<>();
    Path file = new Path(path);
    SequenceFile.Reader reader;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        LinkDatum value = new LinkDatum();
        int i = 0;
        // increment to read start position
        for (; i < start && reader.next(key, value); i++) {
        }
        while (reader.next(key, value) && i < end) {
            HashMap<String, String> t_row = getLinksRow(key, value);
            rows.add(t_row);
            i++;
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return rows;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) LinkDatum(org.apache.nutch.scoring.webgraph.LinkDatum) IOException(java.io.IOException) SequenceFile(org.apache.hadoop.io.SequenceFile)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 WebApplicationException (javax.ws.rs.WebApplicationException)3 Path (org.apache.hadoop.fs.Path)3 SequenceFile (org.apache.hadoop.io.SequenceFile)3 Reader (org.apache.hadoop.io.SequenceFile.Reader)3 Writable (org.apache.hadoop.io.Writable)3 LinkDatum (org.apache.nutch.scoring.webgraph.LinkDatum)3