use of org.apache.avro.file.DataFileWriter in project hive by apache.
the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.
@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
// Need to verify that when reading a datum with an updated reader schema
// that the datum then returns the reader schema as its own, since we
// depend on this behavior in order to avoid re-encoding the datum
// in the serde.
String v0 = "{\n" + " \"namespace\": \"org.apache.hadoop.hive\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " }\n" + " ]\n" + "}";
String v1 = "{\n" + " \"namespace\": \"org.apache.hadoop.hive\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " },\n" + " {\n" + " \"name\":\"v1\",\n" + " \"type\":\"string\",\n" + " \"default\":\"v1_default\"" + " }\n" + " ]\n" + "}";
Schema[] schemas = { AvroSerdeUtils.getSchemaFor(v0), AvroSerdeUtils.getSchemaFor(v1) };
// Encode a schema with v0, write out.
GenericRecord record = new GenericData.Record(schemas[0]);
record.put("v0", "v0 value");
assertTrue(GenericData.get().validate(schemas[0], record));
// Write datum out to a stream
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
dfw.create(schemas[0], baos);
dfw.append(record);
dfw.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
gdr.setExpected(schemas[1]);
DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
assertTrue(dfs.hasNext());
GenericRecord next = dfs.next();
assertEquals("v0 value", next.get("v0").toString());
assertEquals("v1_default", next.get("v1").toString());
// Now the most important check - when we query this record for its schema,
// we should get back the latest, reader schema:
assertEquals(schemas[1], next.getSchema());
}
use of org.apache.avro.file.DataFileWriter in project crunch by cloudera.
the class MultiAvroSchemaJoinTest method setUp.
@Before
public void setUp() throws Exception {
this.personFile = File.createTempFile("person", ".avro");
this.employeeFile = File.createTempFile("employee", ".avro");
DatumWriter<Person> pdw = new SpecificDatumWriter<Person>();
DataFileWriter<Person> pfw = new DataFileWriter<Person>(pdw);
pfw.create(Person.SCHEMA$, personFile);
Person p1 = new Person();
p1.setName("Josh");
p1.setAge(19);
p1.setSiblingnames(ImmutableList.<CharSequence>of("Kate", "Mike"));
pfw.append(p1);
Person p2 = new Person();
p2.setName("Kate");
p2.setAge(17);
p2.setSiblingnames(ImmutableList.<CharSequence>of("Josh", "Mike"));
pfw.append(p2);
Person p3 = new Person();
p3.setName("Mike");
p3.setAge(12);
p3.setSiblingnames(ImmutableList.<CharSequence>of("Josh", "Kate"));
pfw.append(p3);
pfw.close();
DatumWriter<Employee> edw = new SpecificDatumWriter<Employee>();
DataFileWriter<Employee> efw = new DataFileWriter<Employee>(edw);
efw.create(Employee.SCHEMA$, employeeFile);
Employee e1 = new Employee();
e1.setName("Kate");
e1.setSalary(100000);
e1.setDepartment("Marketing");
efw.append(e1);
efw.close();
}
use of org.apache.avro.file.DataFileWriter in project crunch by cloudera.
the class AvroTypeSortTest method writeAvroFile.
private void writeAvroFile(List<Person> people, File avroFile) throws IOException {
FileOutputStream outputStream = new FileOutputStream(avroFile);
SpecificDatumWriter<Person> writer = new SpecificDatumWriter<Person>(Person.class);
DataFileWriter<Person> dataFileWriter = new DataFileWriter<Person>(writer);
dataFileWriter.create(Person.SCHEMA$, outputStream);
for (Person person : people) {
dataFileWriter.append(person);
}
dataFileWriter.close();
outputStream.close();
}
use of org.apache.avro.file.DataFileWriter in project crunch by cloudera.
the class AvroFileReaderFactoryTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema outputSchema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.avroFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(outputSchema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
dataFileWriter.create(outputSchema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
dataFileWriter.close();
outputStream.close();
}
use of org.apache.avro.file.DataFileWriter in project Gaffer by gchq.
the class AvroSerialiser method serialise.
public byte[] serialise(final Object object) throws SerialisationException {
Schema schema = ReflectData.get().getSchema(object.getClass());
DatumWriter<Object> datumWriter = new ReflectDatumWriter<>(schema);
DataFileWriter<Object> dataFileWriter = new DataFileWriter<>(datumWriter);
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
try {
dataFileWriter.create(schema, byteOut);
dataFileWriter.append(object);
dataFileWriter.flush();
} catch (final IOException e) {
throw new SerialisationException("Unable to serialise given object of class: " + object.getClass().getName(), e);
} finally {
close(dataFileWriter);
}
return byteOut.toByteArray();
}
Aggregations