Search in sources :

Example 1 with PersonWithoutEmail

use of com.twitter.data.proto.tutorial.AddressBookProtos.PersonWithoutEmail in project elephant-bird by twitter.

the class TestRCFileProtobufStorage method testRCFileStorage.

@Test
public void testRCFileStorage() throws Exception {
    // write to rcFile using RCFileProtobufStorage
    for (String line : String.format("DEFINE b64ToTuple %s('%s');\n" + "A = load '%s' as (line);\n" + "A = foreach A generate b64ToTuple(line) as t;\n" + "A = foreach A generate FLATTEN(t);\n" + "STORE A into '%s' using %s('%s');\n", B64ToTuple.class.getName(), Person.class.getName(), inputDir.toURI().toString(), rcfile_in.toURI().toString(), RCFileProtobufPigStorage.class.getName(), Person.class.getName()).split("\n")) {
        pigServer.registerQuery(line + "\n");
    }
    // create an rcfile with Person objects directly with out converting to a
    // tuple so that optional fields that are not set are null in RCFile
    ProtobufWritable<Person> personWritable = ProtobufWritable.newInstance(Person.class);
    RecordWriter<Writable, Writable> protoWriter = createProtoWriter(Person.class, new File(rcfile_in, "persons_with_unset_fields.rc"));
    for (Person person : records) {
        personWritable.set(person);
        protoWriter.write(null, personWritable);
    }
    protoWriter.close(null);
    // create an rcFile with PersonWithoutEmail to test unknown fields
    ProtobufWritable<PersonWithoutEmail> pweWritable = ProtobufWritable.newInstance(PersonWithoutEmail.class);
    protoWriter = createProtoWriter(PersonWithoutEmail.class, new File(rcfile_in, "persons_with_unknows.rc"));
    for (Person person : records) {
        pweWritable.set(PersonWithoutEmail.newBuilder().mergeFrom(person.toByteArray()).build());
        protoWriter.write(null, pweWritable);
    }
    protoWriter.close(null);
    // load all the files
    pigServer.registerQuery(String.format("A = load '%s' using %s('%s');\n", rcfile_in.toURI().toString(), RCFileProtobufPigLoader.class.getName(), Person.class.getName()));
    // verify the result:
    Iterator<Tuple> rows = pigServer.openIterator("A");
    for (int i = 0; i < 3; i++) {
        for (Person person : records) {
            String expected = personToString(person);
            Assert.assertEquals(expected, rows.next().toString());
        }
    }
    // clean up on successful run
    FileUtil.fullyDelete(new File(testDir));
}
Also used : Writable(org.apache.hadoop.io.Writable) ProtobufWritable(com.twitter.elephantbird.mapreduce.io.ProtobufWritable) PersonWithoutEmail(com.twitter.data.proto.tutorial.AddressBookProtos.PersonWithoutEmail) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) File(java.io.File) ProtobufBytesToTuple(com.twitter.elephantbird.pig.piggybank.ProtobufBytesToTuple) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Aggregations

Person (com.twitter.data.proto.tutorial.AddressBookProtos.Person)1 PersonWithoutEmail (com.twitter.data.proto.tutorial.AddressBookProtos.PersonWithoutEmail)1 ProtobufWritable (com.twitter.elephantbird.mapreduce.io.ProtobufWritable)1 ProtobufBytesToTuple (com.twitter.elephantbird.pig.piggybank.ProtobufBytesToTuple)1 File (java.io.File)1 Writable (org.apache.hadoop.io.Writable)1 Tuple (org.apache.pig.data.Tuple)1 Test (org.junit.Test)1