use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.
the class CrunchElephantBirdExample method run.
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: <address_book.proto> <people.proto> <output_dir>");
return 1;
}
// Creates a reference to a collection of address books that we will read into memory in our
// map tasks.
PCollection<AddressBook> addresses = read(LzoProtobufSource.at(new Path(args[0]), AddressBook.class));
// Processes a large collection of Person records, limiting it to those people who are found in
// at least one of the AddressBook records. The address books are assumed to be small, and so we
// pass a readable version of them into the function we use to process the Person records on the
// map-side.
PCollection<Person> found = read(LzoProtobufSource.at(new Path(args[1]), Person.class)).filter(new PersonInAddressBookFn(addresses.asReadable(false)));
// Write the distinct Person records that made it past the address book filter to the given output directory
// as protocol buffer records.
write(distinct(found), new LzoProtobufTarget(new Path(args[2])));
// Execute the pipeline and return a success indicator
return done().succeeded() ? 0 : 1;
}
use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.
the class TestProtobufMultiFormatLoader method setUp.
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));
pigServer = PigTestUtil.makePigServer();
inputDir.mkdirs();
// write to block file
ProtobufBlockWriter<Person> blk_writer = new ProtobufBlockWriter<Person>(createLzoOut("1-block.lzo", conf), Person.class);
for (Person rec : records) {
blk_writer.write(rec);
}
blk_writer.close();
ProtobufWritable<Person> protoWritable = ProtobufWritable.newInstance(Person.class);
// write tb64 lines
LzoBinaryB64LineRecordWriter<Person, ProtobufWritable<Person>> b64_writer = LzoBinaryB64LineRecordWriter.newProtobufWriter(Person.class, createLzoOut("2-b64.lzo", conf));
for (Person rec : records) {
protoWritable.set(rec);
b64_writer.write(null, protoWritable);
}
b64_writer.close(null);
}
use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.
the class TimeProtoConversions method main.
/**
* @param args
* @throws ExecException
*/
public static void main(String[] args) throws ExecException {
int iterations = 100000;
ProtobufToPig protoConv = new ProtobufToPig();
for (int i = 0; i < iterations; i++) {
Person proto = Fixtures.buildPersonProto();
Tuple t = protoConv.toTuple(proto);
t.get(0);
t = new ProtobufTuple(proto);
t.get(0);
}
StopWatch timer = new StopWatch();
timer.start();
for (int i = 0; i < iterations; i++) {
Person proto = Fixtures.buildPersonProto();
Tuple t = protoConv.toTuple(proto);
t.get(0);
}
timer.split();
System.err.println(timer.getSplitTime());
timer.reset();
timer.start();
for (int i = 0; i < iterations; i++) {
Person proto = Fixtures.buildPersonProto();
Tuple t = new ProtobufTuple(proto);
t.get(0);
}
timer.split();
System.err.println(timer.getSplitTime());
}
use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.
the class ProtobufDeserializerTest method setUp.
@Before
public void setUp() throws SerDeException {
PhoneNumber pn1 = PhoneNumber.newBuilder().setNumber("pn0001").setType(PhoneType.HOME).build();
PhoneNumber pn2 = PhoneNumber.newBuilder().setNumber("pn0002").setType(PhoneType.WORK).build();
PhoneNumber pn3 = PhoneNumber.newBuilder().setNumber("pn0003").build();
test_pn = PhoneNumber.newBuilder().setNumber("pn0004").setType(PhoneType.MOBILE).build();
Person p1 = Person.newBuilder().setName("p1").setId(1).setEmail("p1@twitter").addPhone(pn1).addPhone(pn2).addPhone(pn3).build();
Person p2 = Person.newBuilder().setName("p2").setId(2).addPhone(test_pn).build();
Person p3 = Person.newBuilder().setName("p3").setId(3).build();
test_ab = AddressBook.newBuilder().addPerson(p1).addPerson(p2).addPerson(p3).setByteData(ByteString.copyFrom(new byte[] { 16, 32, 64, (byte) 128 })).build();
deserializer = new ProtobufDeserializer();
Properties properties = new Properties();
properties.setProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_CLASS, AddressBook.class.getName());
deserializer.initialize(new Configuration(), properties);
protobufOI = (ProtobufStructObjectInspector) deserializer.getObjectInspector();
}
use of com.twitter.data.proto.tutorial.AddressBookProtos.Person in project elephant-bird by twitter.
the class ProtobufDeserializerTest method testObjectInspector.
@Test
public final void testObjectInspector() throws SerDeException {
ObjectInspector oi = deserializer.getObjectInspector();
assertEquals(oi.getCategory(), Category.STRUCT);
ProtobufStructObjectInspector protobufOI = (ProtobufStructObjectInspector) oi;
List<Object> readData = protobufOI.getStructFieldsDataAsList(test_ab);
assertEquals(readData.size(), 2);
@SuppressWarnings("unchecked") ByteString byteStr = (ByteString) readData.get(1);
assertEquals(byteStr, ByteString.copyFrom(new byte[] { 16, 32, 64, (byte) 128 }));
List<Person> persons = (List<Person>) readData.get(0);
assertEquals(persons.size(), 3);
assertEquals(persons.get(0).getPhoneCount(), 3);
assertEquals(persons.get(0).getPhone(2).getType(), PhoneType.HOME);
assertEquals(persons.get(0).getId(), 1);
assertEquals(persons.get(1).getPhoneCount(), 1);
assertEquals(persons.get(1).getPhone(0), test_pn);
assertEquals(persons.get(1).getPhone(0).getType(), PhoneType.MOBILE);
assertEquals(persons.get(2).getPhoneCount(), 0);
assertEquals(persons.get(2).getId(), 3);
assertEquals(persons.get(2).getEmail(), "");
}
Aggregations