Search in sources :

Example 1 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class TestProtobufMultiFormatLoader method testMultiFormatLoader.

@Test
public void testMultiFormatLoader() throws Exception {
    // setUp might not have run because of missing lzo native libraries
    Assume.assumeTrue(pigServer != null);
    pigServer.registerQuery(String.format("A = load '%s' using %s('%s');\n", inputDir.toURI().toString(), ProtobufPigLoader.class.getName(), Person.class.getName()));
    Iterator<Tuple> rows = pigServer.openIterator("A");
    // verify:
    for (int i = 0; i < 2; i++) {
        for (Person person : records) {
            String expected = personToString(person);
            Assert.assertEquals(expected, rows.next().toString());
        }
    }
    FileUtil.fullyDelete(inputDir);
}
Also used : Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 2 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class TestRCFileProtobufStorage method setUp.

@Before
public void setUp() throws Exception {
    FileUtil.fullyDelete(new File(testDir));
    pigServer = PigTestUtil.makePigServer();
    inputDir.mkdirs();
    // create an text file with b64 encoded protobufs
    FileOutputStream out = new FileOutputStream(new File(inputDir, "persons_b64.txt"));
    for (Person rec : records) {
        out.write(base64.encode(rec.toByteArray()));
        out.write('\n');
    }
    out.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) File(java.io.File) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) Before(org.junit.Before)

Example 3 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class TestRCFileProtobufStorage method testRCFileStorage.

@Test
public void testRCFileStorage() throws Exception {
    // write to rcFile using RCFileProtobufStorage
    for (String line : String.format("DEFINE b64ToTuple %s('%s');\n" + "A = load '%s' as (line);\n" + "A = foreach A generate b64ToTuple(line) as t;\n" + "A = foreach A generate FLATTEN(t);\n" + "STORE A into '%s' using %s('%s');\n", B64ToTuple.class.getName(), Person.class.getName(), inputDir.toURI().toString(), rcfile_in.toURI().toString(), RCFileProtobufPigStorage.class.getName(), Person.class.getName()).split("\n")) {
        pigServer.registerQuery(line + "\n");
    }
    // create an rcfile with Person objects directly with out converting to a
    // tuple so that optional fields that are not set are null in RCFile
    ProtobufWritable<Person> personWritable = ProtobufWritable.newInstance(Person.class);
    RecordWriter<Writable, Writable> protoWriter = createProtoWriter(Person.class, new File(rcfile_in, "persons_with_unset_fields.rc"));
    for (Person person : records) {
        personWritable.set(person);
        protoWriter.write(null, personWritable);
    }
    protoWriter.close(null);
    // create an rcFile with PersonWithoutEmail to test unknown fields
    ProtobufWritable<PersonWithoutEmail> pweWritable = ProtobufWritable.newInstance(PersonWithoutEmail.class);
    protoWriter = createProtoWriter(PersonWithoutEmail.class, new File(rcfile_in, "persons_with_unknows.rc"));
    for (Person person : records) {
        pweWritable.set(PersonWithoutEmail.newBuilder().mergeFrom(person.toByteArray()).build());
        protoWriter.write(null, pweWritable);
    }
    protoWriter.close(null);
    // load all the files
    pigServer.registerQuery(String.format("A = load '%s' using %s('%s');\n", rcfile_in.toURI().toString(), RCFileProtobufPigLoader.class.getName(), Person.class.getName()));
    // verify the result:
    Iterator<Tuple> rows = pigServer.openIterator("A");
    for (int i = 0; i < 3; i++) {
        for (Person person : records) {
            String expected = personToString(person);
            Assert.assertEquals(expected, rows.next().toString());
        }
    }
    // clean up on successful run
    FileUtil.fullyDelete(new File(testDir));
}
Also used : Writable(org.apache.hadoop.io.Writable) ProtobufWritable(com.twitter.elephantbird.mapreduce.io.ProtobufWritable) PersonWithoutEmail(com.twitter.data.proto.tutorial.AddressBookProtos.PersonWithoutEmail) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) File(java.io.File) ProtobufBytesToTuple(com.twitter.elephantbird.pig.piggybank.ProtobufBytesToTuple) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 4 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class TestProtoToPig method testLazyProtoToPig.

@Test
public void testLazyProtoToPig() throws ExecException {
    Person personProto = Fixtures.buildPersonProto();
    Tuple protoTuple = new ProtobufTuple(personProto);
    Tuple normalTuple = Fixtures.buildPersonTuple();
    List<FieldDescriptor> fieldDescs = personProto.getDescriptorForType().getFields();
    TypeRef<Person> typeRef = PigUtil.getProtobufTypeRef(Person.class.getName());
    Tuple projectedTuple = new ProjectedProtobufTupleFactory<Person>(typeRef, evenFields(fieldDescs)).newTuple(personProto);
    int idx = 0;
    for (FieldDescriptor fd : fieldDescs) {
        // gives us non-null fields, which are not equal to the null fields...
        if (normalTuple.get(fd.getIndex()) instanceof DataBag) {
            continue;
        }
        assertEquals(protoTuple.get(fd.getIndex()), normalTuple.get(fd.getIndex()));
        if (idx % 2 == 0) {
            assertEquals(projectedTuple.get(fd.getIndex() / 2), normalTuple.get(fd.getIndex()));
        }
        idx++;
    }
}
Also used : DataBag(org.apache.pig.data.DataBag) ProtobufTuple(com.twitter.elephantbird.pig.util.ProtobufTuple) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) ProtobufTuple(com.twitter.elephantbird.pig.util.ProtobufTuple) Tuple(org.apache.pig.data.Tuple) FieldDescriptor(com.google.protobuf.Descriptors.FieldDescriptor) Test(org.junit.Test)

Example 5 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class TestPigToProtobuf method testPerson.

@Test
public void testPerson() {
    Person expected = personMessage("Joe", 1, null, "123-456-7890", "HOME");
    Person actual = PigToProtobuf.tupleToMessage(Person.class, personTuple("Joe", 1, null, "123-456-7890", "HOME"));
    Assert.assertNotNull(actual);
    Assert.assertEquals(expected, actual);
}
Also used : Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) Test(org.junit.Test)

Aggregations

Person (com.twitter.data.proto.tutorial.AddressBookProtos.Person)12 Test (org.junit.Test)5 AddressBook (com.twitter.data.proto.tutorial.AddressBookProtos.AddressBook)4 Tuple (org.apache.pig.data.Tuple)4 Before (org.junit.Before)3 ProtobufWritable (com.twitter.elephantbird.mapreduce.io.ProtobufWritable)2 ProtobufTuple (com.twitter.elephantbird.pig.util.ProtobufTuple)2 File (java.io.File)2 Configuration (org.apache.hadoop.conf.Configuration)2 BeforeClass (org.junit.BeforeClass)2 ByteString (com.google.protobuf.ByteString)1 FieldDescriptor (com.google.protobuf.Descriptors.FieldDescriptor)1 PhoneNumber (com.twitter.data.proto.tutorial.AddressBookProtos.Person.PhoneNumber)1 PersonWithoutEmail (com.twitter.data.proto.tutorial.AddressBookProtos.PersonWithoutEmail)1 ProtobufBlockWriter (com.twitter.elephantbird.mapreduce.io.ProtobufBlockWriter)1 ProtobufBytesToTuple (com.twitter.elephantbird.pig.piggybank.ProtobufBytesToTuple)1 ProtobufToPig (com.twitter.elephantbird.pig.util.ProtobufToPig)1 TypeRef (com.twitter.elephantbird.util.TypeRef)1 FileOutputStream (java.io.FileOutputStream)1 List (java.util.List)1