use of org.apache.avro.generic.GenericDatumWriter in project incubator-gobblin by apache.
the class EmbeddedGobblinDistcpTest method testCheckSchema.
@Test
public void testCheckSchema() throws Exception {
Schema schema = null;
try (InputStream is = GobblinMetricsPinotFlattenerConverter.class.getClassLoader().getResourceAsStream("avroSchemaManagerTest/expectedSchema.avsc")) {
schema = new Schema.Parser().parse(is);
} catch (IOException e) {
e.printStackTrace();
}
String fileName = "file.avro";
File tmpSource = Files.createTempDir();
tmpSource.deleteOnExit();
File tmpTarget = Files.createTempDir();
tmpTarget.deleteOnExit();
File tmpFile = new File(tmpSource, fileName);
tmpFile.createNewFile();
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
dataFileWriter.create(schema, tmpFile);
for (int i = 0; i < 100; i++) {
GenericRecord record = new GenericData.Record(schema);
record.put("foo", i);
dataFileWriter.append(record);
}
Assert.assertTrue(new File(tmpSource, fileName).exists());
Assert.assertFalse(new File(tmpTarget, fileName).exists());
EmbeddedGobblinDistcp embedded = new EmbeddedGobblinDistcp(new Path(tmpSource.getAbsolutePath()), new Path(tmpTarget.getAbsolutePath()));
embedded.setConfiguration(CopySource.SCHEMA_CHECK_ENABLED, "true");
embedded.setLaunchTimeout(30, TimeUnit.SECONDS);
embedded.setConfiguration(ConfigurationKeys.SOURCE_CLASS_KEY, SchemaCheckedCopySource.class.getName());
embedded.setConfiguration(ConfigurationKeys.AVRO_SCHEMA_CHECK_STRATEGY, "org.apache.gobblin.util.schema_check.AvroSchemaCheckDefaultStrategy");
// test when schema is not the expected one, the job will be aborted.
embedded.setConfiguration(ConfigurationKeys.COPY_EXPECTED_SCHEMA, "{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo1\",\"type\":[\"null\",\"int\"],\"doc\":\"this is for test\",\"default\":null}]}");
JobExecutionResult result = embedded.run();
Assert.assertTrue(new File(tmpSource, fileName).exists());
Assert.assertFalse(result.isSuccessful());
Assert.assertFalse(new File(tmpTarget, fileName).exists());
embedded.setConfiguration(ConfigurationKeys.COPY_EXPECTED_SCHEMA, "{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo\",\"type\":[\"string\",\"int\"],\"doc\":\"this is for test\",\"default\":null}]}");
result = embedded.run();
Assert.assertTrue(new File(tmpSource, fileName).exists());
Assert.assertFalse(result.isSuccessful());
Assert.assertFalse(new File(tmpTarget, fileName).exists());
// test when schema is the expected one, the job will succeed.
embedded.setConfiguration(ConfigurationKeys.COPY_EXPECTED_SCHEMA, "{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"doc\":\"this is for test\",\"default\":null}]}");
result = embedded.run();
Assert.assertTrue(result.isSuccessful());
Assert.assertTrue(new File(tmpSource, fileName).exists());
Assert.assertTrue(new File(tmpTarget, fileName).exists());
}
use of org.apache.avro.generic.GenericDatumWriter in project incubator-gobblin by apache.
the class LiAvroSerializerBase method serialize.
public byte[] serialize(String topic, GenericRecord data) throws SerializationException {
Schema schema = data.getSchema();
MD5Digest schemaId = null;
try {
schemaId = schemaRegistry.register(topic, schema);
ByteArrayOutputStream out = new ByteArrayOutputStream();
// MAGIC_BYTE | schemaId-bytes | avro_payload
out.write(LiAvroSerDeHelper.MAGIC_BYTE);
out.write(schemaId.asBytes());
BinaryEncoder encoder = encoderFactory.directBinaryEncoder(out, null);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
writer.write(data, encoder);
encoder.flush();
byte[] bytes = out.toByteArray();
out.close();
return bytes;
} catch (IOException | SchemaRegistryException e) {
throw new SerializationException(e);
}
}
use of org.apache.avro.generic.GenericDatumWriter in project crunch by cloudera.
the class AvroFileReaderFactoryTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema outputSchema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.avroFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(outputSchema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
dataFileWriter.create(outputSchema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
dataFileWriter.close();
outputStream.close();
}
use of org.apache.avro.generic.GenericDatumWriter in project druid by druid-io.
the class AvroStreamInputRowParserTest method testParse.
@Test
public void testParse() throws SchemaValidationException, IOException {
// serde test
Repository repository = new InMemoryRepository(null);
AvroStreamInputRowParser parser = new AvroStreamInputRowParser(PARSE_SPEC, new SchemaRepoBasedAvroBytesDecoder<String, Integer>(new Avro1124SubjectAndIdConverter(TOPIC), repository));
ByteBufferInputRowParser parser2 = jsonMapper.readValue(jsonMapper.writeValueAsString(parser), ByteBufferInputRowParser.class);
repository = ((SchemaRepoBasedAvroBytesDecoder) ((AvroStreamInputRowParser) parser2).getAvroBytesDecoder()).getSchemaRepository();
// prepare data
GenericRecord someAvroDatum = buildSomeAvroDatum();
// encode schema id
Avro1124SubjectAndIdConverter converter = new Avro1124SubjectAndIdConverter(TOPIC);
TypedSchemaRepository<Integer, Schema, String> repositoryClient = new TypedSchemaRepository<Integer, Schema, String>(repository, new IntegerConverter(), new AvroSchemaConverter(), new IdentityConverter());
Integer id = repositoryClient.registerSchema(TOPIC, SomeAvroDatum.getClassSchema());
ByteBuffer byteBuffer = ByteBuffer.allocate(4);
converter.putSubjectAndId(TOPIC, id, byteBuffer);
ByteArrayOutputStream out = new ByteArrayOutputStream();
out.write(byteBuffer.array());
// encode data
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(someAvroDatum.getSchema());
// write avro datum to bytes
writer.write(someAvroDatum, EncoderFactory.get().directBinaryEncoder(out, null));
InputRow inputRow = parser2.parse(ByteBuffer.wrap(out.toByteArray()));
assertInputRowCorrect(inputRow);
}
use of org.apache.avro.generic.GenericDatumWriter in project druid by druid-io.
the class InlineSchemasAvroBytesDecoderTest method testParse.
@Test
public void testParse() throws Exception {
GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
Schema schema = SomeAvroDatum.getClassSchema();
ByteArrayOutputStream out = new ByteArrayOutputStream();
out.write(new byte[] { 1 });
out.write(ByteBuffer.allocate(4).putInt(10).array());
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
writer.write(someAvroDatum, EncoderFactory.get().directBinaryEncoder(out, null));
GenericRecord actual = new InlineSchemasAvroBytesDecoder(ImmutableMap.of(10, schema)).parse(ByteBuffer.wrap(out.toByteArray()));
Assert.assertEquals(someAvroDatum.get("id"), actual.get("id"));
}
Aggregations