use of com.twitter.elephantbird.mapreduce.io.ProtobufWritable in project elephant-bird by twitter.
the class TestRCFileProtobufStorage method testRCFileStorage.
@Test
public void testRCFileStorage() throws Exception {
// write to rcFile using RCFileProtobufStorage
for (String line : String.format("DEFINE b64ToTuple %s('%s');\n" + "A = load '%s' as (line);\n" + "A = foreach A generate b64ToTuple(line) as t;\n" + "A = foreach A generate FLATTEN(t);\n" + "STORE A into '%s' using %s('%s');\n", B64ToTuple.class.getName(), Person.class.getName(), inputDir.toURI().toString(), rcfile_in.toURI().toString(), RCFileProtobufPigStorage.class.getName(), Person.class.getName()).split("\n")) {
pigServer.registerQuery(line + "\n");
}
// create an rcfile with Person objects directly with out converting to a
// tuple so that optional fields that are not set are null in RCFile
ProtobufWritable<Person> personWritable = ProtobufWritable.newInstance(Person.class);
RecordWriter<Writable, Writable> protoWriter = createProtoWriter(Person.class, new File(rcfile_in, "persons_with_unset_fields.rc"));
for (Person person : records) {
personWritable.set(person);
protoWriter.write(null, personWritable);
}
protoWriter.close(null);
// create an rcFile with PersonWithoutEmail to test unknown fields
ProtobufWritable<PersonWithoutEmail> pweWritable = ProtobufWritable.newInstance(PersonWithoutEmail.class);
protoWriter = createProtoWriter(PersonWithoutEmail.class, new File(rcfile_in, "persons_with_unknows.rc"));
for (Person person : records) {
pweWritable.set(PersonWithoutEmail.newBuilder().mergeFrom(person.toByteArray()).build());
protoWriter.write(null, pweWritable);
}
protoWriter.close(null);
// load all the files
pigServer.registerQuery(String.format("A = load '%s' using %s('%s');\n", rcfile_in.toURI().toString(), RCFileProtobufPigLoader.class.getName(), Person.class.getName()));
// verify the result:
Iterator<Tuple> rows = pigServer.openIterator("A");
for (int i = 0; i < 3; i++) {
for (Person person : records) {
String expected = personToString(person);
Assert.assertEquals(expected, rows.next().toString());
}
}
// clean up on successful run
FileUtil.fullyDelete(new File(testDir));
}
use of com.twitter.elephantbird.mapreduce.io.ProtobufWritable in project elephant-bird by twitter.
the class TestProtobufMultiFormatLoader method setUp.
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));
pigServer = PigTestUtil.makePigServer();
inputDir.mkdirs();
// write to block file
ProtobufBlockWriter<Person> blk_writer = new ProtobufBlockWriter<Person>(createLzoOut("1-block.lzo", conf), Person.class);
for (Person rec : records) {
blk_writer.write(rec);
}
blk_writer.close();
ProtobufWritable<Person> protoWritable = ProtobufWritable.newInstance(Person.class);
// write tb64 lines
LzoBinaryB64LineRecordWriter<Person, ProtobufWritable<Person>> b64_writer = LzoBinaryB64LineRecordWriter.newProtobufWriter(Person.class, createLzoOut("2-b64.lzo", conf));
for (Person rec : records) {
protoWritable.set(rec);
b64_writer.write(null, protoWritable);
}
b64_writer.close(null);
}
use of com.twitter.elephantbird.mapreduce.io.ProtobufWritable in project elephant-bird by twitter.
the class TestProtobufWritable method testReadWrite.
@Test
public void testReadWrite() throws IOException {
DataOutputStream dos = new DataOutputStream(new FileOutputStream("test.txt"));
referenceAbWritable.write(dos);
dos.close();
DataInputStream dis = new DataInputStream(new FileInputStream("test.txt"));
ProtobufWritable<AddressBook> after = new ProtobufWritable<AddressBook>(new TypeRef<AddressBook>() {
});
after.readFields(dis);
dis.close();
AddressBook ab2 = after.get();
assertEquals(referenceAb, ab2);
assertEquals(referenceAbWritable.hashCode(), after.hashCode());
}
Aggregations