Search in sources :

Example 6 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class CrunchElephantBirdExample method run.

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: <address_book.proto> <people.proto> <output_dir>");
        return 1;
    }
    // Creates a reference to a collection of address books that we will read into memory in our
    // map tasks.
    PCollection<AddressBook> addresses = read(LzoProtobufSource.at(new Path(args[0]), AddressBook.class));
    // Processes a large collection of Person records, limiting it to those people who are found in
    // at least one of the AddressBook records. The address books are assumed to be small, and so we
    // pass a readable version of them into the function we use to process the Person records on the
    // map-side.
    PCollection<Person> found = read(LzoProtobufSource.at(new Path(args[1]), Person.class)).filter(new PersonInAddressBookFn(addresses.asReadable(false)));
    // Write the distinct Person records that made it past the address book filter to the given output directory
    // as protocol buffer records.
    write(distinct(found), new LzoProtobufTarget(new Path(args[2])));
    // Execute the pipeline and return a success indicator
    return done().succeeded() ? 0 : 1;
}
Also used : Path(org.apache.hadoop.fs.Path) AddressBook(com.twitter.data.proto.tutorial.AddressBookProtos.AddressBook) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person)

Example 7 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class TestProtobufMultiFormatLoader method setUp.

@Before
public void setUp() throws Exception {
    Configuration conf = new Configuration();
    Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));
    pigServer = PigTestUtil.makePigServer();
    inputDir.mkdirs();
    // write to block file
    ProtobufBlockWriter<Person> blk_writer = new ProtobufBlockWriter<Person>(createLzoOut("1-block.lzo", conf), Person.class);
    for (Person rec : records) {
        blk_writer.write(rec);
    }
    blk_writer.close();
    ProtobufWritable<Person> protoWritable = ProtobufWritable.newInstance(Person.class);
    // write tb64 lines
    LzoBinaryB64LineRecordWriter<Person, ProtobufWritable<Person>> b64_writer = LzoBinaryB64LineRecordWriter.newProtobufWriter(Person.class, createLzoOut("2-b64.lzo", conf));
    for (Person rec : records) {
        protoWritable.set(rec);
        b64_writer.write(null, protoWritable);
    }
    b64_writer.close(null);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ProtobufBlockWriter(com.twitter.elephantbird.mapreduce.io.ProtobufBlockWriter) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) ProtobufWritable(com.twitter.elephantbird.mapreduce.io.ProtobufWritable) Before(org.junit.Before)

Example 8 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class TimeProtoConversions method main.

/**
 * @param args
 * @throws ExecException
 */
public static void main(String[] args) throws ExecException {
    int iterations = 100000;
    ProtobufToPig protoConv = new ProtobufToPig();
    for (int i = 0; i < iterations; i++) {
        Person proto = Fixtures.buildPersonProto();
        Tuple t = protoConv.toTuple(proto);
        t.get(0);
        t = new ProtobufTuple(proto);
        t.get(0);
    }
    StopWatch timer = new StopWatch();
    timer.start();
    for (int i = 0; i < iterations; i++) {
        Person proto = Fixtures.buildPersonProto();
        Tuple t = protoConv.toTuple(proto);
        t.get(0);
    }
    timer.split();
    System.err.println(timer.getSplitTime());
    timer.reset();
    timer.start();
    for (int i = 0; i < iterations; i++) {
        Person proto = Fixtures.buildPersonProto();
        Tuple t = new ProtobufTuple(proto);
        t.get(0);
    }
    timer.split();
    System.err.println(timer.getSplitTime());
}
Also used : ProtobufToPig(com.twitter.elephantbird.pig.util.ProtobufToPig) ProtobufTuple(com.twitter.elephantbird.pig.util.ProtobufTuple) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) ProtobufTuple(com.twitter.elephantbird.pig.util.ProtobufTuple) Tuple(org.apache.pig.data.Tuple) StopWatch(org.apache.commons.lang.time.StopWatch)

Example 9 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class ProtobufDeserializerTest method setUp.

@Before
public void setUp() throws SerDeException {
    PhoneNumber pn1 = PhoneNumber.newBuilder().setNumber("pn0001").setType(PhoneType.HOME).build();
    PhoneNumber pn2 = PhoneNumber.newBuilder().setNumber("pn0002").setType(PhoneType.WORK).build();
    PhoneNumber pn3 = PhoneNumber.newBuilder().setNumber("pn0003").build();
    test_pn = PhoneNumber.newBuilder().setNumber("pn0004").setType(PhoneType.MOBILE).build();
    Person p1 = Person.newBuilder().setName("p1").setId(1).setEmail("p1@twitter").addPhone(pn1).addPhone(pn2).addPhone(pn3).build();
    Person p2 = Person.newBuilder().setName("p2").setId(2).addPhone(test_pn).build();
    Person p3 = Person.newBuilder().setName("p3").setId(3).build();
    test_ab = AddressBook.newBuilder().addPerson(p1).addPerson(p2).addPerson(p3).setByteData(ByteString.copyFrom(new byte[] { 16, 32, 64, (byte) 128 })).build();
    deserializer = new ProtobufDeserializer();
    Properties properties = new Properties();
    properties.setProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_CLASS, AddressBook.class.getName());
    deserializer.initialize(new Configuration(), properties);
    protobufOI = (ProtobufStructObjectInspector) deserializer.getObjectInspector();
}
Also used : AddressBook(com.twitter.data.proto.tutorial.AddressBookProtos.AddressBook) Configuration(org.apache.hadoop.conf.Configuration) PhoneNumber(com.twitter.data.proto.tutorial.AddressBookProtos.Person.PhoneNumber) Properties(java.util.Properties) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) Before(org.junit.Before)

Example 10 with Person

use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.

the class ProtobufDeserializerTest method testObjectInspector.

@Test
public final void testObjectInspector() throws SerDeException {
    ObjectInspector oi = deserializer.getObjectInspector();
    assertEquals(oi.getCategory(), Category.STRUCT);
    ProtobufStructObjectInspector protobufOI = (ProtobufStructObjectInspector) oi;
    List<Object> readData = protobufOI.getStructFieldsDataAsList(test_ab);
    assertEquals(readData.size(), 2);
    @SuppressWarnings("unchecked") ByteString byteStr = (ByteString) readData.get(1);
    assertEquals(byteStr, ByteString.copyFrom(new byte[] { 16, 32, 64, (byte) 128 }));
    List<Person> persons = (List<Person>) readData.get(0);
    assertEquals(persons.size(), 3);
    assertEquals(persons.get(0).getPhoneCount(), 3);
    assertEquals(persons.get(0).getPhone(2).getType(), PhoneType.HOME);
    assertEquals(persons.get(0).getId(), 1);
    assertEquals(persons.get(1).getPhoneCount(), 1);
    assertEquals(persons.get(1).getPhone(0), test_pn);
    assertEquals(persons.get(1).getPhone(0).getType(), PhoneType.MOBILE);
    assertEquals(persons.get(2).getPhoneCount(), 0);
    assertEquals(persons.get(2).getId(), 3);
    assertEquals(persons.get(2).getEmail(), "");
}
Also used : StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ByteString(com.google.protobuf.ByteString) List(java.util.List) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) Test(org.junit.Test)

Aggregations

Person (com.twitter.data.proto.tutorial.AddressBookProtos.Person)12 Test (org.junit.Test)5 AddressBook (com.twitter.data.proto.tutorial.AddressBookProtos.AddressBook)4 Tuple (org.apache.pig.data.Tuple)4 Before (org.junit.Before)3 ProtobufWritable (com.twitter.elephantbird.mapreduce.io.ProtobufWritable)2 ProtobufTuple (com.twitter.elephantbird.pig.util.ProtobufTuple)2 File (java.io.File)2 Configuration (org.apache.hadoop.conf.Configuration)2 BeforeClass (org.junit.BeforeClass)2 ByteString (com.google.protobuf.ByteString)1 FieldDescriptor (com.google.protobuf.Descriptors.FieldDescriptor)1 PhoneNumber (com.twitter.data.proto.tutorial.AddressBookProtos.Person.PhoneNumber)1 PersonWithoutEmail (com.twitter.data.proto.tutorial.AddressBookProtos.PersonWithoutEmail)1 ProtobufBlockWriter (com.twitter.elephantbird.mapreduce.io.ProtobufBlockWriter)1 ProtobufBytesToTuple (com.twitter.elephantbird.pig.piggybank.ProtobufBytesToTuple)1 ProtobufToPig (com.twitter.elephantbird.pig.util.ProtobufToPig)1 TypeRef (com.twitter.elephantbird.util.TypeRef)1 FileOutputStream (java.io.FileOutputStream)1 List (java.util.List)1