Search in sources :

Example 56 with NullWritable

use of org.apache.hadoop.io.NullWritable in project mongo-hadoop by mongodb.

the class BSONWritable method toBSON.

/**
     * Unwrap a (usually Writable) Object, getting back a value suitable for
     * putting into a BSONObject. If the given object is not Writable, then
     * simply return the Object back.
     *
     * @param x the Object to turn into BSON.
     * @return the BSON representation of the Object.
     */
@SuppressWarnings("unchecked")
public static Object toBSON(final Object x) {
    if (x == null) {
        return null;
    }
    if (x instanceof Text) {
        return x.toString();
    }
    if (x instanceof BSONWritable) {
        return ((BSONWritable) x).getDoc();
    }
    if (x instanceof Writable) {
        if (x instanceof AbstractMapWritable) {
            if (!(x instanceof Map)) {
                throw new IllegalArgumentException(String.format("Cannot turn %s into BSON, since it does " + "not implement java.util.Map.", x.getClass().getName()));
            }
            Map<Writable, Writable> map = (Map<Writable, Writable>) x;
            BasicBSONObject bson = new BasicBSONObject();
            for (Map.Entry<Writable, Writable> entry : map.entrySet()) {
                bson.put(entry.getKey().toString(), toBSON(entry.getValue()));
            }
            return bson;
        }
        if (x instanceof ArrayWritable) {
            Writable[] o = ((ArrayWritable) x).get();
            Object[] a = new Object[o.length];
            for (int i = 0; i < o.length; i++) {
                a[i] = toBSON(o[i]);
            }
            return a;
        }
        if (x instanceof NullWritable) {
            return null;
        }
        if (x instanceof BooleanWritable) {
            return ((BooleanWritable) x).get();
        }
        if (x instanceof BytesWritable) {
            return ((BytesWritable) x).getBytes();
        }
        if (x instanceof ByteWritable) {
            return ((ByteWritable) x).get();
        }
        if (x instanceof DoubleWritable) {
            return ((DoubleWritable) x).get();
        }
        if (x instanceof FloatWritable) {
            return ((FloatWritable) x).get();
        }
        if (x instanceof LongWritable) {
            return ((LongWritable) x).get();
        }
        if (x instanceof IntWritable) {
            return ((IntWritable) x).get();
        }
    // TODO - Support counters
    }
    return x;
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) AbstractMapWritable(org.apache.hadoop.io.AbstractMapWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) NullWritable(org.apache.hadoop.io.NullWritable) BasicBSONObject(org.bson.BasicBSONObject) FloatWritable(org.apache.hadoop.io.FloatWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) AbstractMapWritable(org.apache.hadoop.io.AbstractMapWritable) BasicBSONObject(org.bson.BasicBSONObject) BasicDBObject(com.mongodb.BasicDBObject) BSONObject(org.bson.BSONObject) LongWritable(org.apache.hadoop.io.LongWritable) Map(java.util.Map) ByteWritable(org.apache.hadoop.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 57 with NullWritable

use of org.apache.hadoop.io.NullWritable in project mongo-hadoop by mongodb.

the class BSONFileInputFormatTest method enronEmails.

@Test
public void enronEmails() throws IOException {
    BSONFileInputFormat inputFormat = new BSONFileInputFormat();
    JobConf job = new JobConf();
    String inputDirectory = new File(EXAMPLE_DATA_HOME, "/dump/enron_mail/messages.bson").getAbsoluteFile().toURI().toString();
    // Hadoop 2.X
    job.set("mapreduce.input.fileinputformat.inputdir", inputDirectory);
    // Hadoop 1.2.X
    job.set("mapred.input.dir", inputDirectory);
    FileSplit[] splits = inputFormat.getSplits(job, 5);
    int count = 0;
    BSONWritable writable = new BSONWritable();
    for (FileSplit split : splits) {
        RecordReader<NullWritable, BSONWritable> recordReader = inputFormat.getRecordReader(split, job, null);
        while (recordReader.next(null, writable)) {
            count++;
        }
    }
    assertEquals("There are 501513 messages in the enron corpus", 501513, count);
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 58 with NullWritable

use of org.apache.hadoop.io.NullWritable in project mongo-hadoop by mongodb.

the class BSONFileInputFormatTest method enronEmails.

@Test
public void enronEmails() throws IOException {
    BSONFileInputFormat inputFormat = new BSONFileInputFormat();
    JobConf job = new JobConf();
    String inputDirectory = new File(EXAMPLE_DATA_HOME, "/dump/enron_mail/messages.bson").getAbsoluteFile().toURI().toString();
    // Hadoop 2.X
    job.set("mapreduce.input.fileinputformat.inputdir", inputDirectory);
    // Hadoop 1.2.X
    job.set("mapred.input.dir", inputDirectory);
    FileSplit[] splits = inputFormat.getSplits(job, 5);
    int count = 0;
    BSONWritable writable = new BSONWritable();
    for (FileSplit split : splits) {
        RecordReader<NullWritable, BSONWritable> recordReader = inputFormat.getRecordReader(split, job, null);
        while (recordReader.next(null, writable)) {
            count++;
        }
    }
    assertEquals("There are 501513 messages in the enron corpus", 501513, count);
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BSONFileInputFormat(com.mongodb.hadoop.mapred.BSONFileInputFormat) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 59 with NullWritable

use of org.apache.hadoop.io.NullWritable in project camel by apache.

the class HdfsConsumerTest method testReadDouble.

@Test
public void testReadDouble() throws Exception {
    if (!canTest()) {
        return;
    }
    final Path file = new Path(new File("target/test/test-camel-double").getAbsolutePath());
    Configuration conf = new Configuration();
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, DoubleWritable.class);
    NullWritable keyWritable = NullWritable.get();
    DoubleWritable valueWritable = new DoubleWritable();
    double value = 3.1415926535;
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();
    resultEndpoint.assertIsSatisfied();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) RouteBuilder(org.apache.camel.builder.RouteBuilder) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) FileSystem(org.apache.hadoop.fs.FileSystem) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 60 with NullWritable

use of org.apache.hadoop.io.NullWritable in project camel by apache.

the class HdfsConsumerTest method testReadLong.

@Test
public void testReadLong() throws Exception {
    if (!canTest()) {
        return;
    }
    final Path file = new Path(new File("target/test/test-camel-long").getAbsolutePath());
    Configuration conf = new Configuration();
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, LongWritable.class);
    NullWritable keyWritable = NullWritable.get();
    LongWritable valueWritable = new LongWritable();
    long value = 31415926535L;
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();
    resultEndpoint.assertIsSatisfied();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) RouteBuilder(org.apache.camel.builder.RouteBuilder) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)101 Test (org.junit.Test)65 Configuration (org.apache.hadoop.conf.Configuration)41 Path (org.apache.hadoop.fs.Path)41 File (java.io.File)29 FileSystem (org.apache.hadoop.fs.FileSystem)26 SequenceFile (org.apache.hadoop.io.SequenceFile)22 JobConf (org.apache.hadoop.mapred.JobConf)22 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)16 InputSplit (org.apache.hadoop.mapred.InputSplit)16 LongWritable (org.apache.hadoop.io.LongWritable)15 IntWritable (org.apache.hadoop.io.IntWritable)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8 IOException (java.io.IOException)7 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)7 FloatWritable (org.apache.hadoop.io.FloatWritable)7