Search in sources :

Example 11 with Reader

use of org.apache.hadoop.io.SequenceFile.Reader in project nutch by apache.

the class NodeReader method read.

@Override
public List read(String path) throws FileNotFoundException {
    List<HashMap> rows = new ArrayList<>();
    Path file = new Path(path);
    SequenceFile.Reader reader;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Node value = new Node();
        while (reader.next(key, value)) {
            try {
                HashMap<String, String> t_row = getNodeRow(key, value);
                rows.add(t_row);
            } catch (Exception e) {
            }
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return rows;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) Node(org.apache.nutch.scoring.webgraph.Node) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) WebApplicationException(javax.ws.rs.WebApplicationException) SequenceFile(org.apache.hadoop.io.SequenceFile)

Example 12 with Reader

use of org.apache.hadoop.io.SequenceFile.Reader in project kylin by apache.

the class HiveToBaseCuboidMapperPerformanceTest method test.

@Ignore("convenient trial tool for dev")
@Test
public void test() throws IOException, InterruptedException {
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper();
    Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);
    mapper.doSetup(context);
    Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
    Text value = new Text();
    while (reader.next(key, value)) {
        mapper.map(key, value, context);
    }
    reader.close();
}
Also used : Context(org.apache.hadoop.mapreduce.Mapper.Context) Configuration(org.apache.hadoop.conf.Configuration) Reader(org.apache.hadoop.io.SequenceFile.Reader) Writable(org.apache.hadoop.io.Writable) Text(org.apache.hadoop.io.Text) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 13 with Reader

use of org.apache.hadoop.io.SequenceFile.Reader in project geowave by locationtech.

the class GeoWaveNNIT method readFile.

private int readFile() throws IllegalArgumentException, IOException {
    int count = 0;
    final FileSystem fs = FileSystem.get(MapReduceTestUtils.getConfiguration());
    final FileStatus[] fss = fs.listStatus(new Path(TestUtils.TEMP_DIR + File.separator + MapReduceTestEnvironment.HDFS_BASE_DIRECTORY + "/t1/pairs"));
    for (final FileStatus ifs : fss) {
        if (ifs.isFile() && ifs.getPath().toString().matches(".*part-r-0000[0-9]")) {
            try (SequenceFile.Reader reader = new SequenceFile.Reader(MapReduceTestUtils.getConfiguration(), Reader.file(ifs.getPath()))) {
                final Text key = new Text();
                final Text val = new Text();
                while (reader.next(key, val)) {
                    count++;
                }
            }
        }
    }
    return count;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Reader(org.apache.hadoop.io.SequenceFile.Reader) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) Reader(org.apache.hadoop.io.SequenceFile.Reader) Text(org.apache.hadoop.io.Text)

Example 14 with Reader

use of org.apache.hadoop.io.SequenceFile.Reader in project circus-train by ExpediaGroup.

the class CircusTrainCopyListingTest method typical.

@Test
public void typical() throws IOException {
    File input = temp.newFolder("input");
    File inputSub2 = new File(input, "sub1/sub2");
    inputSub2.mkdirs();
    Files.asCharSink(new File(inputSub2, "data"), UTF_8).write("test1");
    File listFile = temp.newFile("listFile");
    Path pathToListFile = new Path(listFile.toURI());
    List<Path> sourceDataLocations = new ArrayList<>();
    sourceDataLocations.add(new Path(inputSub2.toURI()));
    DistCpOptions options = new DistCpOptions(sourceDataLocations, new Path("dummy"));
    CircusTrainCopyListing.setRootPath(conf, new Path(input.toURI()));
    CircusTrainCopyListing copyListing = new CircusTrainCopyListing(conf, null);
    copyListing.doBuildListing(pathToListFile, options);
    try (Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(pathToListFile))) {
        Text key = new Text();
        CopyListingFileStatus value = new CopyListingFileStatus();
        assertTrue(reader.next(key, value));
        assertThat(key.toString(), is("/sub1/sub2"));
        assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2"));
        assertTrue(reader.next(key, value));
        assertThat(key.toString(), is("/sub1/sub2/data"));
        assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2/data"));
        assertFalse(reader.next(key, value));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DistCpOptions(org.apache.hadoop.tools.DistCpOptions) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) ArrayList(java.util.ArrayList) Reader(org.apache.hadoop.io.SequenceFile.Reader) Text(org.apache.hadoop.io.Text) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Example 15 with Reader

use of org.apache.hadoop.io.SequenceFile.Reader in project nutch by apache.

the class LinkReader method slice.

@Override
public List slice(String path, int start, int end) throws FileNotFoundException {
    List<HashMap> rows = new ArrayList<>();
    Path file = new Path(path);
    SequenceFile.Reader reader;
    try {
        reader = new SequenceFile.Reader(conf, Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        LinkDatum value = new LinkDatum();
        int i = 0;
        // increment to read start position
        for (; i < start && reader.next(key, value); i++) {
        }
        while (reader.next(key, value) && i < end) {
            HashMap<String, String> t_row = getLinksRow(key, value);
            rows.add(t_row);
            i++;
        }
        reader.close();
    } catch (FileNotFoundException fne) {
        throw new FileNotFoundException();
    } catch (IOException e) {
        e.printStackTrace();
        LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
        throw new WebApplicationException();
    }
    return rows;
}
Also used : Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.io.SequenceFile.Reader) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Writable(org.apache.hadoop.io.Writable) LinkDatum(org.apache.nutch.scoring.webgraph.LinkDatum) IOException(java.io.IOException) SequenceFile(org.apache.hadoop.io.SequenceFile)

Aggregations

Reader (org.apache.hadoop.io.SequenceFile.Reader)26 Path (org.apache.hadoop.fs.Path)20 SequenceFile (org.apache.hadoop.io.SequenceFile)18 IOException (java.io.IOException)14 Writable (org.apache.hadoop.io.Writable)14 FileNotFoundException (java.io.FileNotFoundException)12 WebApplicationException (javax.ws.rs.WebApplicationException)12 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)6 Text (org.apache.hadoop.io.Text)6 Node (org.apache.nutch.scoring.webgraph.Node)4 Test (org.junit.Test)4 List (java.util.List)3 LinkDatum (org.apache.nutch.scoring.webgraph.LinkDatum)3 HashSet (java.util.HashSet)2 Configuration (org.apache.hadoop.conf.Configuration)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Writer (org.apache.hadoop.io.SequenceFile.Writer)2 FlowFile (org.apache.nifi.flowfile.FlowFile)2 ProcessException (org.apache.nifi.processor.exception.ProcessException)2