Search in sources :

Example 66 with NullWritable

use of org.apache.hadoop.io.NullWritable in project camel by apache.

the class HdfsConsumerTest method testReadBytes.

@Test
public void testReadBytes() throws Exception {
    if (!canTest()) {
        return;
    }
    final Path file = new Path(new File("target/test/test-camel-bytes").getAbsolutePath());
    Configuration conf = new Configuration();
    SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, BytesWritable.class);
    NullWritable keyWritable = NullWritable.get();
    BytesWritable valueWritable = new BytesWritable();
    String value = "CIAO!";
    valueWritable.set(value.getBytes(), 0, value.getBytes().length);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs2:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();
    resultEndpoint.assertIsSatisfied();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) RouteBuilder(org.apache.camel.builder.RouteBuilder) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) BytesWritable(org.apache.hadoop.io.BytesWritable) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Writer(org.apache.hadoop.io.SequenceFile.Writer) Test(org.junit.Test)

Example 67 with NullWritable

use of org.apache.hadoop.io.NullWritable in project camel by apache.

the class HdfsConsumerTest method testReadWithReadSuffix.

@Test
public void testReadWithReadSuffix() throws Exception {
    if (!canTest()) {
        return;
    }
    String[] beforeFiles = new File("target/test").list();
    int before = beforeFiles != null ? beforeFiles.length : 0;
    final Path file = new Path(new File("target/test/test-camel-boolean").getAbsolutePath());
    Configuration conf = new Configuration();
    SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, BooleanWritable.class);
    NullWritable keyWritable = NullWritable.get();
    BooleanWritable valueWritable = new BooleanWritable();
    valueWritable.set(true);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs2:localhost/" + file.getParent().toUri() + "?scheduler=#myScheduler&pattern=*&fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0&readSuffix=handled").to("mock:result");
        }
    });
    ScheduledExecutorService pool = context.getExecutorServiceManager().newScheduledThreadPool(null, "unitTestPool", 1);
    DefaultScheduledPollConsumerScheduler scheduler = new DefaultScheduledPollConsumerScheduler(pool);
    ((JndiRegistry) ((PropertyPlaceholderDelegateRegistry) context.getRegistry()).getRegistry()).bind("myScheduler", scheduler);
    context.start();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    resultEndpoint.assertIsSatisfied();
    // synchronize on pool that was used to run hdfs consumer thread
    scheduler.getScheduledExecutorService().shutdown();
    scheduler.getScheduledExecutorService().awaitTermination(5000, TimeUnit.MILLISECONDS);
    Set<String> files = new HashSet<String>(Arrays.asList(new File("target/test").list()));
    // there may be some leftover files before, so test that we only added 2 new files
    assertThat(files.size() - before, equalTo(2));
    assertTrue(files.remove("test-camel-boolean.handled"));
    assertTrue(files.remove(".test-camel-boolean.handled.crc"));
}
Also used : Path(org.apache.hadoop.fs.Path) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Configuration(org.apache.hadoop.conf.Configuration) RouteBuilder(org.apache.camel.builder.RouteBuilder) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) NullWritable(org.apache.hadoop.io.NullWritable) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) JndiRegistry(org.apache.camel.impl.JndiRegistry) SequenceFile(org.apache.hadoop.io.SequenceFile) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Writer(org.apache.hadoop.io.SequenceFile.Writer) DefaultScheduledPollConsumerScheduler(org.apache.camel.impl.DefaultScheduledPollConsumerScheduler) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 68 with NullWritable

use of org.apache.hadoop.io.NullWritable in project camel by apache.

the class HdfsConsumerTest method testReadDouble.

@Test
public void testReadDouble() throws Exception {
    if (!canTest()) {
        return;
    }
    final Path file = new Path(new File("target/test/test-camel-double").getAbsolutePath());
    Configuration conf = new Configuration();
    SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, DoubleWritable.class);
    NullWritable keyWritable = NullWritable.get();
    DoubleWritable valueWritable = new DoubleWritable();
    double value = 3.1415926535;
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs2:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();
    resultEndpoint.assertIsSatisfied();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) RouteBuilder(org.apache.camel.builder.RouteBuilder) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Writer(org.apache.hadoop.io.SequenceFile.Writer) Test(org.junit.Test)

Example 69 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestFileOutputCommitter method writeOutput.

private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException {
    NullWritable nullWritable = NullWritable.get();
    try {
        theRecordWriter.write(key1, val1);
        theRecordWriter.write(null, nullWritable);
        theRecordWriter.write(null, val1);
        theRecordWriter.write(nullWritable, val2);
        theRecordWriter.write(key2, nullWritable);
        theRecordWriter.write(key1, null);
        theRecordWriter.write(null, null);
        theRecordWriter.write(key2, val2);
    } finally {
        theRecordWriter.close(null);
    }
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable)

Example 70 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestRehashPartitioner method testPatterns.

/** test partitioner for patterns */
@Test
public void testPatterns() {
    int[] results = new int[PARTITIONS];
    RehashPartitioner<IntWritable, NullWritable> p = new RehashPartitioner<IntWritable, NullWritable>();
    /* test sequence 4, 8, 12, ... 128 */
    for (int i = 0; i < END; i += STEP) {
        results[p.getPartition(new IntWritable(i), null, PARTITIONS)]++;
    }
    int badbuckets = 0;
    Integer min = Collections.min(Arrays.asList(ArrayUtils.toObject(results)));
    Integer max = Collections.max(Arrays.asList(ArrayUtils.toObject(results)));
    Integer avg = (int) Math.round((max + min) / 2.0);
    System.out.println("Dumping buckets distribution: min=" + min + " avg=" + avg + " max=" + max);
    for (int i = 0; i < PARTITIONS; i++) {
        double var = (results[i] - avg) / (double) (avg);
        System.out.println("bucket " + i + " " + results[i] + " items, variance " + var);
        if (Math.abs(var) > MAX_ERROR)
            badbuckets++;
    }
    System.out.println(badbuckets + " of " + PARTITIONS + " are too small or large buckets");
    assertTrue("too many overflow buckets", badbuckets < PARTITIONS * MAX_BADBUCKETS);
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)101 Test (org.junit.Test)65 Configuration (org.apache.hadoop.conf.Configuration)41 Path (org.apache.hadoop.fs.Path)41 File (java.io.File)29 FileSystem (org.apache.hadoop.fs.FileSystem)26 SequenceFile (org.apache.hadoop.io.SequenceFile)22 JobConf (org.apache.hadoop.mapred.JobConf)22 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)16 InputSplit (org.apache.hadoop.mapred.InputSplit)16 LongWritable (org.apache.hadoop.io.LongWritable)15 IntWritable (org.apache.hadoop.io.IntWritable)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8 IOException (java.io.IOException)7 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)7 FloatWritable (org.apache.hadoop.io.FloatWritable)7