use of org.apache.avro.file.DataFileWriter in project incubator-gobblin by apache.
the class AvroTestTools method writeAsAvroBinary.
private void writeAsAvroBinary(Iterator<GenericRecord> input, Schema schema, FileSystem fs, Path outputPath) throws IOException {
DataFileWriter writer = new DataFileWriter(new GenericDatumWriter());
writer.create(schema, fs.create(outputPath, true));
while (input.hasNext()) {
writer.append(input.next());
}
writer.close();
log.info("Successfully wrote avro file to path " + outputPath);
}
use of org.apache.avro.file.DataFileWriter in project incubator-gobblin by apache.
the class CompactionGMCEPublishingActionTest method createAvroFileWithRepeatingRecords.
public void createAvroFileWithRepeatingRecords(File file, GenericRecord r, int count, Optional<Schema> schema) throws IOException {
DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>());
writer.create(schema.isPresent() ? schema.get() : getSchema(), new FileOutputStream(file));
for (int i = 0; i < count; ++i) {
writer.append(r);
}
writer.close();
}
use of org.apache.avro.file.DataFileWriter in project incubator-gobblin by apache.
the class AvroCompactionTaskTest method createAvroFileWithRepeatingRecords.
private void createAvroFileWithRepeatingRecords(File file, GenericRecord r, int count, Optional<Schema> schema) throws IOException {
DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>());
writer.create(schema.isPresent() ? schema.get() : getSchema(), new FileOutputStream(file));
for (int i = 0; i < count; ++i) {
writer.append(r);
}
writer.close();
}
use of org.apache.avro.file.DataFileWriter in project incubator-gobblin by apache.
the class AvroRecursionEliminatingConverterTest method generateRecord.
public File generateRecord() throws IOException {
Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/recursive.avsc"));
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(inputSchema);
GenericRecord record = new GenericData.Record(inputSchema);
record.put("name", "John");
record.put("date_of_birth", 1234L);
record.put("last_modified", 4567L);
record.put("created", 6789L);
GenericRecord addressRecord = new GenericData.Record(inputSchema.getField("address").schema());
addressRecord.put("city", "Los Angeles");
addressRecord.put("street_number", 1234);
GenericRecord innerAddressRecord = new GenericData.Record(inputSchema.getField("address").schema());
innerAddressRecord.put("city", "San Francisco");
innerAddressRecord.put("street_number", 3456);
addressRecord.put("previous_address", innerAddressRecord);
record.put("address", addressRecord);
File recordFile = File.createTempFile(this.getClass().getSimpleName(), "avsc");
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
dataFileWriter.create(inputSchema, recordFile);
dataFileWriter.append(record);
dataFileWriter.close();
recordFile.deleteOnExit();
return recordFile;
}
use of org.apache.avro.file.DataFileWriter in project incubator-gobblin by apache.
the class EmbeddedGobblinDistcpTest method testCheckSchema.
@Test
public void testCheckSchema() throws Exception {
Schema schema = null;
try (InputStream is = GobblinMetricsPinotFlattenerConverter.class.getClassLoader().getResourceAsStream("avroSchemaManagerTest/expectedSchema.avsc")) {
schema = new Schema.Parser().parse(is);
} catch (IOException e) {
e.printStackTrace();
}
String fileName = "file.avro";
File tmpSource = Files.createTempDir();
tmpSource.deleteOnExit();
File tmpTarget = Files.createTempDir();
tmpTarget.deleteOnExit();
File tmpFile = new File(tmpSource, fileName);
tmpFile.createNewFile();
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
dataFileWriter.create(schema, tmpFile);
for (int i = 0; i < 100; i++) {
GenericRecord record = new GenericData.Record(schema);
record.put("foo", i);
dataFileWriter.append(record);
}
Assert.assertTrue(new File(tmpSource, fileName).exists());
Assert.assertFalse(new File(tmpTarget, fileName).exists());
EmbeddedGobblinDistcp embedded = new EmbeddedGobblinDistcp(new Path(tmpSource.getAbsolutePath()), new Path(tmpTarget.getAbsolutePath()));
embedded.setConfiguration(CopySource.SCHEMA_CHECK_ENABLED, "true");
embedded.setLaunchTimeout(30, TimeUnit.SECONDS);
embedded.setConfiguration(ConfigurationKeys.SOURCE_CLASS_KEY, SchemaCheckedCopySource.class.getName());
embedded.setConfiguration(ConfigurationKeys.AVRO_SCHEMA_CHECK_STRATEGY, "org.apache.gobblin.util.schema_check.AvroSchemaCheckDefaultStrategy");
// test when schema is not the expected one, the job will be aborted.
embedded.setConfiguration(ConfigurationKeys.COPY_EXPECTED_SCHEMA, "{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo1\",\"type\":[\"null\",\"int\"],\"doc\":\"this is for test\",\"default\":null}]}");
JobExecutionResult result = embedded.run();
Assert.assertTrue(new File(tmpSource, fileName).exists());
Assert.assertFalse(result.isSuccessful());
Assert.assertFalse(new File(tmpTarget, fileName).exists());
embedded.setConfiguration(ConfigurationKeys.COPY_EXPECTED_SCHEMA, "{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo\",\"type\":[\"string\",\"int\"],\"doc\":\"this is for test\",\"default\":null}]}");
result = embedded.run();
Assert.assertTrue(new File(tmpSource, fileName).exists());
Assert.assertFalse(result.isSuccessful());
Assert.assertFalse(new File(tmpTarget, fileName).exists());
// test when schema is the expected one, the job will succeed.
embedded.setConfiguration(ConfigurationKeys.COPY_EXPECTED_SCHEMA, "{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"doc\":\"this is for test\",\"default\":null}]}");
result = embedded.run();
Assert.assertTrue(result.isSuccessful());
Assert.assertTrue(new File(tmpSource, fileName).exists());
Assert.assertTrue(new File(tmpTarget, fileName).exists());
}
Aggregations