use of org.apache.crunch.test.Employee in project crunch by cloudera.
the class MultiAvroSchemaJoinTest method setUp.
@Before
public void setUp() throws Exception {
this.personFile = File.createTempFile("person", ".avro");
this.employeeFile = File.createTempFile("employee", ".avro");
DatumWriter<Person> pdw = new SpecificDatumWriter<Person>();
DataFileWriter<Person> pfw = new DataFileWriter<Person>(pdw);
pfw.create(Person.SCHEMA$, personFile);
Person p1 = new Person();
p1.setName("Josh");
p1.setAge(19);
p1.setSiblingnames(ImmutableList.<CharSequence>of("Kate", "Mike"));
pfw.append(p1);
Person p2 = new Person();
p2.setName("Kate");
p2.setAge(17);
p2.setSiblingnames(ImmutableList.<CharSequence>of("Josh", "Mike"));
pfw.append(p2);
Person p3 = new Person();
p3.setName("Mike");
p3.setAge(12);
p3.setSiblingnames(ImmutableList.<CharSequence>of("Josh", "Kate"));
pfw.append(p3);
pfw.close();
DatumWriter<Employee> edw = new SpecificDatumWriter<Employee>();
DataFileWriter<Employee> efw = new DataFileWriter<Employee>(edw);
efw.create(Employee.SCHEMA$, employeeFile);
Employee e1 = new Employee();
e1.setName("Kate");
e1.setSalary(100000);
e1.setDepartment("Marketing");
efw.append(e1);
efw.close();
}
use of org.apache.crunch.test.Employee in project crunch by cloudera.
the class MultiAvroSchemaJoinTest method testJoin.
@Test
public void testJoin() throws Exception {
Pipeline p = new MRPipeline(MultiAvroSchemaJoinTest.class);
PCollection<Person> people = p.read(From.avroFile(personFile.getAbsolutePath(), records(Person.class)));
PCollection<Employee> employees = p.read(From.avroFile(employeeFile.getAbsolutePath(), records(Employee.class)));
Iterable<Pair<Person, Employee>> result = people.by(new NameFn<Person>(), strings()).join(employees.by(new NameFn<Employee>(), strings())).values().materialize();
List<Pair<Person, Employee>> v = Lists.newArrayList(result);
assertEquals(1, v.size());
assertEquals("Kate", v.get(0).first().getName().toString());
assertEquals("Kate", v.get(0).second().getName().toString());
}
use of org.apache.crunch.test.Employee in project crunch by cloudera.
the class AggregateTest method testCollectValues_Avro.
@Test
public void testCollectValues_Avro() throws IOException {
MapStringToEmployeePair mapFn = new MapStringToEmployeePair();
Pipeline pipeline = new MRPipeline(AggregateTest.class);
Map<Integer, Collection<Employee>> collectionMap = pipeline.readTextFile(FileHelper.createTempCopyOf("set2.txt")).parallelDo(mapFn, Avros.tableOf(Avros.ints(), Avros.records(Employee.class))).collectValues().materializeToMap();
assertEquals(1, collectionMap.size());
Employee empC = mapFn.map("c").second();
Employee empD = mapFn.map("d").second();
Employee empA = mapFn.map("a").second();
assertEquals(Lists.newArrayList(empC, empD, empA), collectionMap.get(1));
}
Aggregations