use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class AggregateTest method testCollectUrls.
@Test
public void testCollectUrls() throws Exception {
Pipeline p = new MRPipeline(AggregateTest.class);
String urlsInputPath = FileHelper.createTempCopyOf("urls.txt");
PTable<String, Collection<String>> urls = Aggregate.collectValues(p.readTextFile(urlsInputPath).parallelDo(new SplitFn(), tableOf(strings(), strings())));
for (Pair<String, Collection<String>> e : urls.materialize()) {
String key = e.first();
int expectedSize = 0;
if ("www.A.com".equals(key)) {
expectedSize = 4;
} else if ("www.B.com".equals(key) || "www.F.com".equals(key)) {
expectedSize = 2;
} else if ("www.C.com".equals(key) || "www.D.com".equals(key) || "www.E.com".equals(key)) {
expectedSize = 1;
}
assertEquals("Checking key = " + key, expectedSize, e.second().size());
p.done();
}
}
use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class AggregateTest method testCollectValues_Writables.
@Test
public void testCollectValues_Writables() throws IOException {
Pipeline pipeline = new MRPipeline(AggregateTest.class);
Map<Integer, Collection<Text>> collectionMap = pipeline.readTextFile(FileHelper.createTempCopyOf("set2.txt")).parallelDo(new MapStringToTextPair(), Writables.tableOf(Writables.ints(), Writables.writables(Text.class))).collectValues().materializeToMap();
assertEquals(1, collectionMap.size());
assertEquals(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a")), collectionMap.get(1));
}
use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class AvroFileSourceTargetTest method testReflect.
@Test
public void testReflect() throws IOException {
Schema pojoPersonSchema = ReflectData.get().getSchema(PojoPerson.class);
GenericRecord savedRecord = new GenericData.Record(pojoPersonSchema);
savedRecord.put("name", "John Doe");
populateGenericFile(Lists.newArrayList(savedRecord), pojoPersonSchema);
Pipeline pipeline = new MRPipeline(AvroFileSourceTargetTest.class);
PCollection<PojoPerson> personCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(), Avros.reflects(PojoPerson.class)));
List<PojoPerson> recordList = Lists.newArrayList(personCollection.materialize());
assertEquals(1, recordList.size());
PojoPerson person = recordList.get(0);
assertEquals("John Doe", person.getName());
}
use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class AvroFileSourceTargetTest method testSpecific.
@Test
public void testSpecific() throws IOException {
GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
savedRecord.put("name", "John Doe");
savedRecord.put("age", 42);
savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
Pipeline pipeline = new MRPipeline(AvroFileSourceTargetTest.class);
PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(), Avros.records(Person.class)));
List<Person> personList = Lists.newArrayList(genericCollection.materialize());
Person expectedPerson = new Person();
expectedPerson.setName("John Doe");
expectedPerson.setAge(42);
List<CharSequence> siblingNames = Lists.newArrayList();
siblingNames.add("Jimmy");
siblingNames.add("Jane");
expectedPerson.setSiblingnames(siblingNames);
assertEquals(Lists.newArrayList(expectedPerson), Lists.newArrayList(personList));
}
Aggregations