Search in sources :

Example 6 with Person

use of org.apache.crunch.test.Person in project crunch by cloudera.

the class AvroTypeTest method testGetDetachedValue_ReflectAvroType.

@Test
public void testGetDetachedValue_ReflectAvroType() {
    AvroType<Person> reflectType = Avros.reflects(Person.class);
    Person person = new Person();
    person.setName("name value");
    person.setAge(42);
    person.setSiblingnames(Lists.<CharSequence>newArrayList());
    Person detachedPerson = reflectType.getDetachedValue(person);
    assertEquals(person, detachedPerson);
    assertNotSame(person, detachedPerson);
}
Also used : Person(org.apache.crunch.test.Person) Test(org.junit.Test)

Example 7 with Person

use of org.apache.crunch.test.Person in project crunch by cloudera.

the class AvroFileReaderFactoryTest method testRead_SpecificReader.

@Test
public void testRead_SpecificReader() throws IOException {
    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
    AvroFileReaderFactory<Person> genericReader = new AvroFileReaderFactory<Person>(Avros.records(Person.class), new Configuration());
    Iterator<Person> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()), new Path(this.avroFile.getAbsolutePath()));
    Person expectedPerson = new Person();
    expectedPerson.setAge(42);
    expectedPerson.setName("John Doe");
    List<CharSequence> siblingNames = Lists.newArrayList();
    siblingNames.add("Jimmy");
    siblingNames.add("Jane");
    expectedPerson.setSiblingnames(siblingNames);
    Person person = recordIterator.next();
    assertEquals(expectedPerson, person);
    assertFalse(recordIterator.hasNext());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Person(org.apache.crunch.test.Person) Test(org.junit.Test)

Example 8 with Person

use of org.apache.crunch.test.Person in project crunch by cloudera.

the class AvroFileSourceTest method testConfigureJob_SpecificData.

@Test
public void testConfigureJob_SpecificData() throws IOException {
    AvroType<Person> avroSpecificType = Avros.records(Person.class);
    AvroFileSource<Person> personFileSource = new AvroFileSource<Person>(new Path(tempFile.getAbsolutePath()), avroSpecificType);
    personFileSource.configureSource(job, -1);
    assertFalse(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true));
    assertEquals(Person.SCHEMA$.toString(), job.getConfiguration().get(AvroJob.INPUT_SCHEMA));
}
Also used : Path(org.apache.hadoop.fs.Path) Person(org.apache.crunch.test.Person) Test(org.junit.Test)

Example 9 with Person

use of org.apache.crunch.test.Person in project crunch by cloudera.

the class MultiAvroSchemaJoinTest method setUp.

@Before
public void setUp() throws Exception {
    this.personFile = File.createTempFile("person", ".avro");
    this.employeeFile = File.createTempFile("employee", ".avro");
    DatumWriter<Person> pdw = new SpecificDatumWriter<Person>();
    DataFileWriter<Person> pfw = new DataFileWriter<Person>(pdw);
    pfw.create(Person.SCHEMA$, personFile);
    Person p1 = new Person();
    p1.setName("Josh");
    p1.setAge(19);
    p1.setSiblingnames(ImmutableList.<CharSequence>of("Kate", "Mike"));
    pfw.append(p1);
    Person p2 = new Person();
    p2.setName("Kate");
    p2.setAge(17);
    p2.setSiblingnames(ImmutableList.<CharSequence>of("Josh", "Mike"));
    pfw.append(p2);
    Person p3 = new Person();
    p3.setName("Mike");
    p3.setAge(12);
    p3.setSiblingnames(ImmutableList.<CharSequence>of("Josh", "Kate"));
    pfw.append(p3);
    pfw.close();
    DatumWriter<Employee> edw = new SpecificDatumWriter<Employee>();
    DataFileWriter<Employee> efw = new DataFileWriter<Employee>(edw);
    efw.create(Employee.SCHEMA$, employeeFile);
    Employee e1 = new Employee();
    e1.setName("Kate");
    e1.setSalary(100000);
    e1.setDepartment("Marketing");
    efw.append(e1);
    efw.close();
}
Also used : Employee(org.apache.crunch.test.Employee) DataFileWriter(org.apache.avro.file.DataFileWriter) Person(org.apache.crunch.test.Person) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter) Before(org.junit.Before)

Example 10 with Person

use of org.apache.crunch.test.Person in project crunch by cloudera.

the class MultiAvroSchemaJoinTest method testJoin.

@Test
public void testJoin() throws Exception {
    Pipeline p = new MRPipeline(MultiAvroSchemaJoinTest.class);
    PCollection<Person> people = p.read(From.avroFile(personFile.getAbsolutePath(), records(Person.class)));
    PCollection<Employee> employees = p.read(From.avroFile(employeeFile.getAbsolutePath(), records(Employee.class)));
    Iterable<Pair<Person, Employee>> result = people.by(new NameFn<Person>(), strings()).join(employees.by(new NameFn<Employee>(), strings())).values().materialize();
    List<Pair<Person, Employee>> v = Lists.newArrayList(result);
    assertEquals(1, v.size());
    assertEquals("Kate", v.get(0).first().getName().toString());
    assertEquals("Kate", v.get(0).second().getName().toString());
}
Also used : Employee(org.apache.crunch.test.Employee) MRPipeline(org.apache.crunch.impl.mr.MRPipeline) Person(org.apache.crunch.test.Person) Pipeline(org.apache.crunch.Pipeline) MRPipeline(org.apache.crunch.impl.mr.MRPipeline) Pair(org.apache.crunch.Pair) Test(org.junit.Test)

Aggregations

Person (org.apache.crunch.test.Person)15 Test (org.junit.Test)11 DataFileWriter (org.apache.avro.file.DataFileWriter)3 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)3 MRPipeline (org.apache.crunch.impl.mr.MRPipeline)3 FileOutputStream (java.io.FileOutputStream)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 Pipeline (org.apache.crunch.Pipeline)2 Employee (org.apache.crunch.test.Employee)2 Path (org.apache.hadoop.fs.Path)2 Record (org.apache.avro.generic.GenericData.Record)1 MapFn (org.apache.crunch.MapFn)1 Pair (org.apache.crunch.Pair)1 PojoPerson (org.apache.crunch.io.avro.AvroFileReaderFactoryTest.PojoPerson)1 Builder (org.apache.crunch.test.Person.Builder)1 Configuration (org.apache.hadoop.conf.Configuration)1 Before (org.junit.Before)1