Search in sources :

Example 1 with ProtobufWritable

use of com.twitter.elephantbird.mapreduce.io.ProtobufWritable in project elephant-bird by twitter.

the class TestRCFileProtobufStorage method testRCFileStorage.

@Test
public void testRCFileStorage() throws Exception {
    // write to rcFile using RCFileProtobufStorage
    for (String line : String.format("DEFINE b64ToTuple %s('%s');\n" + "A = load '%s' as (line);\n" + "A = foreach A generate b64ToTuple(line) as t;\n" + "A = foreach A generate FLATTEN(t);\n" + "STORE A into '%s' using %s('%s');\n", B64ToTuple.class.getName(), Person.class.getName(), inputDir.toURI().toString(), rcfile_in.toURI().toString(), RCFileProtobufPigStorage.class.getName(), Person.class.getName()).split("\n")) {
        pigServer.registerQuery(line + "\n");
    }
    // create an rcfile with Person objects directly with out converting to a
    // tuple so that optional fields that are not set are null in RCFile
    ProtobufWritable<Person> personWritable = ProtobufWritable.newInstance(Person.class);
    RecordWriter<Writable, Writable> protoWriter = createProtoWriter(Person.class, new File(rcfile_in, "persons_with_unset_fields.rc"));
    for (Person person : records) {
        personWritable.set(person);
        protoWriter.write(null, personWritable);
    }
    protoWriter.close(null);
    // create an rcFile with PersonWithoutEmail to test unknown fields
    ProtobufWritable<PersonWithoutEmail> pweWritable = ProtobufWritable.newInstance(PersonWithoutEmail.class);
    protoWriter = createProtoWriter(PersonWithoutEmail.class, new File(rcfile_in, "persons_with_unknows.rc"));
    for (Person person : records) {
        pweWritable.set(PersonWithoutEmail.newBuilder().mergeFrom(person.toByteArray()).build());
        protoWriter.write(null, pweWritable);
    }
    protoWriter.close(null);
    // load all the files
    pigServer.registerQuery(String.format("A = load '%s' using %s('%s');\n", rcfile_in.toURI().toString(), RCFileProtobufPigLoader.class.getName(), Person.class.getName()));
    // verify the result:
    Iterator<Tuple> rows = pigServer.openIterator("A");
    for (int i = 0; i < 3; i++) {
        for (Person person : records) {
            String expected = personToString(person);
            Assert.assertEquals(expected, rows.next().toString());
        }
    }
    // clean up on successful run
    FileUtil.fullyDelete(new File(testDir));
}
Also used : Writable(org.apache.hadoop.io.Writable) ProtobufWritable(com.twitter.elephantbird.mapreduce.io.ProtobufWritable) PersonWithoutEmail(com.twitter.data.proto.tutorial.AddressBookProtos.PersonWithoutEmail) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) File(java.io.File) ProtobufBytesToTuple(com.twitter.elephantbird.pig.piggybank.ProtobufBytesToTuple) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 2 with ProtobufWritable

use of com.twitter.elephantbird.mapreduce.io.ProtobufWritable in project elephant-bird by twitter.

the class TestProtobufMultiFormatLoader method setUp.

@Before
public void setUp() throws Exception {
    Configuration conf = new Configuration();
    Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));
    pigServer = PigTestUtil.makePigServer();
    inputDir.mkdirs();
    // write to block file
    ProtobufBlockWriter<Person> blk_writer = new ProtobufBlockWriter<Person>(createLzoOut("1-block.lzo", conf), Person.class);
    for (Person rec : records) {
        blk_writer.write(rec);
    }
    blk_writer.close();
    ProtobufWritable<Person> protoWritable = ProtobufWritable.newInstance(Person.class);
    // write tb64 lines
    LzoBinaryB64LineRecordWriter<Person, ProtobufWritable<Person>> b64_writer = LzoBinaryB64LineRecordWriter.newProtobufWriter(Person.class, createLzoOut("2-b64.lzo", conf));
    for (Person rec : records) {
        protoWritable.set(rec);
        b64_writer.write(null, protoWritable);
    }
    b64_writer.close(null);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ProtobufBlockWriter(com.twitter.elephantbird.mapreduce.io.ProtobufBlockWriter) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) ProtobufWritable(com.twitter.elephantbird.mapreduce.io.ProtobufWritable) Before(org.junit.Before)

Example 3 with ProtobufWritable

use of com.twitter.elephantbird.mapreduce.io.ProtobufWritable in project elephant-bird by twitter.

the class TestProtobufWritable method testReadWrite.

@Test
public void testReadWrite() throws IOException {
    DataOutputStream dos = new DataOutputStream(new FileOutputStream("test.txt"));
    referenceAbWritable.write(dos);
    dos.close();
    DataInputStream dis = new DataInputStream(new FileInputStream("test.txt"));
    ProtobufWritable<AddressBook> after = new ProtobufWritable<AddressBook>(new TypeRef<AddressBook>() {
    });
    after.readFields(dis);
    dis.close();
    AddressBook ab2 = after.get();
    assertEquals(referenceAb, ab2);
    assertEquals(referenceAbWritable.hashCode(), after.hashCode());
}
Also used : AddressBook(com.twitter.data.proto.tutorial.AddressBookProtos.AddressBook) DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) ProtobufWritable(com.twitter.elephantbird.mapreduce.io.ProtobufWritable) Test(org.junit.Test)

Aggregations

ProtobufWritable (com.twitter.elephantbird.mapreduce.io.ProtobufWritable)3 Person (com.twitter.data.proto.tutorial.AddressBookProtos.Person)2 Test (org.junit.Test)2 AddressBook (com.twitter.data.proto.tutorial.AddressBookProtos.AddressBook)1 PersonWithoutEmail (com.twitter.data.proto.tutorial.AddressBookProtos.PersonWithoutEmail)1 ProtobufBlockWriter (com.twitter.elephantbird.mapreduce.io.ProtobufBlockWriter)1 ProtobufBytesToTuple (com.twitter.elephantbird.pig.piggybank.ProtobufBytesToTuple)1 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileOutputStream (java.io.FileOutputStream)1 Configuration (org.apache.hadoop.conf.Configuration)1 Writable (org.apache.hadoop.io.Writable)1 Tuple (org.apache.pig.data.Tuple)1 Before (org.junit.Before)1