use of org.apache.avro.file.DataFileWriter in project hive by apache.
the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.
@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
// Need to verify that when reading a datum with an updated reader schema
// that the datum then returns the reader schema as its own, since we
// depend on this behavior in order to avoid re-encoding the datum
// in the serde.
String v0 = "{\n" + " \"namespace\": \"org.apache.hadoop.hive\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " }\n" + " ]\n" + "}";
String v1 = "{\n" + " \"namespace\": \"org.apache.hadoop.hive\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " },\n" + " {\n" + " \"name\":\"v1\",\n" + " \"type\":\"string\",\n" + " \"default\":\"v1_default\"" + " }\n" + " ]\n" + "}";
Schema[] schemas = { AvroSerdeUtils.getSchemaFor(v0), AvroSerdeUtils.getSchemaFor(v1) };
// Encode a schema with v0, write out.
GenericRecord record = new GenericData.Record(schemas[0]);
record.put("v0", "v0 value");
assertTrue(GenericData.get().validate(schemas[0], record));
// Write datum out to a stream
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
dfw.create(schemas[0], baos);
dfw.append(record);
dfw.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
gdr.setExpected(schemas[1]);
DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
assertTrue(dfs.hasNext());
GenericRecord next = dfs.next();
assertEquals("v0 value", next.get("v0").toString());
assertEquals("v1_default", next.get("v1").toString());
// Now the most important check - when we query this record for its schema,
// we should get back the latest, reader schema:
assertEquals(schemas[1], next.getSchema());
}
use of org.apache.avro.file.DataFileWriter in project haivvreo by jghoman.
the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.
@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
// Need to verify that when reading a datum with an updated reader schema
// that the datum then returns the reader schema as its own, since we
// depend on this behavior in order to avoid re-encoding the datum
// in the serde.
String v0 = "{\n" + " \"namespace\": \"com.linkedin.haivvreo\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " }\n" + " ]\n" + "}";
String v1 = "{\n" + " \"namespace\": \"com.linkedin.haivvreo\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " },\n" + " {\n" + " \"name\":\"v1\",\n" + " \"type\":\"string\",\n" + " \"default\":\"v1_default\"" + " }\n" + " ]\n" + "}";
Schema[] schemas = { Schema.parse(v0), Schema.parse(v1) };
// Encode a schema with v0, write out.
GenericRecord record = new GenericData.Record(schemas[0]);
record.put("v0", "v0 value");
assertTrue(GenericData.get().validate(schemas[0], record));
// Write datum out to a stream
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
dfw.create(schemas[0], baos);
dfw.append(record);
dfw.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
gdr.setExpected(schemas[1]);
DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
assertTrue(dfs.hasNext());
GenericRecord next = dfs.next();
assertEquals("v0 value", next.get("v0").toString());
assertEquals("v1_default", next.get("v1").toString());
// Now the most important check - when we query this record for its schema,
// we should get back the latest, reader schema:
assertEquals(schemas[1], next.getSchema());
}
use of org.apache.avro.file.DataFileWriter in project hazelcast by hazelcast.
the class AvroProcessors method dataFileWriterFn.
@SuppressFBWarnings(value = "RV_RETURN_VALUE_IGNORED_BAD_PRACTICE", justification = "mkdirs() returns false if the directory already existed, which is good. " + "We don't care even if it didn't exist and we failed to create it, " + "because we'll fail later when trying to create the file.")
private static <D> FunctionEx<Processor.Context, DataFileWriter<D>> dataFileWriterFn(String directoryName, String jsonSchema, SupplierEx<DatumWriter<D>> datumWriterSupplier) {
return new FunctionEx<Processor.Context, DataFileWriter<D>>() {
@Override
public DataFileWriter<D> applyEx(Processor.Context context) throws Exception {
Schema.Parser parser = new Schema.Parser();
Schema schema = parser.parse(jsonSchema);
Path directory = Paths.get(directoryName);
directory.toFile().mkdirs();
Path file = directory.resolve(String.valueOf(context.globalProcessorIndex()));
DataFileWriter<D> writer = new DataFileWriter<>(datumWriterSupplier.get());
writer.create(schema, file.toFile());
return writer;
}
@Override
public List<Permission> permissions() {
return singletonList(ConnectorPermission.file(directoryName, ACTION_WRITE));
}
};
}
use of org.apache.avro.file.DataFileWriter in project hazelcast by hazelcast.
the class AvroFileFormatTest method createAvroFile.
private static void createAvroFile(String filename, SpecificUser... users) throws IOException {
File target = new File("target/avro");
FileUtils.deleteDirectory(target);
target.mkdirs();
DataFileWriter<SpecificUser> fileWriter = new DataFileWriter<>(new SpecificDatumWriter<>(SpecificUser.class));
fileWriter.create(SpecificUser.SCHEMA$, new File("target/avro/" + filename));
for (SpecificUser user : users) {
fileWriter.append(user);
}
fileWriter.close();
}
use of org.apache.avro.file.DataFileWriter in project hazelcast by hazelcast.
the class FileUtil method createAvroFile.
static File createAvroFile() {
try {
File file = Files.createTempDirectory("sql-avro-test").toFile();
file.deleteOnExit();
try (DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>())) {
writer.create(AVRO_RECORD.getSchema(), new File(file.getAbsolutePath(), "file.avro"));
writer.append(AVRO_RECORD);
}
return file;
} catch (IOException ioe) {
throw sneakyThrow(ioe);
}
}
Aggregations