use of org.apache.avro.file.DataFileWriter in project avro-kafka-storm by ransilberman.
the class MainTest method testDataFile.
@Test
public void testDataFile() throws IOException {
File fileOut = new File("data.avro");
File fileIn = new File("data.avro");
Schema.Parser parser = new Schema.Parser();
Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
GenericRecord datum = new GenericData.Record(schema);
datum.put("revision", 1L);
datum.put("siteId", "28280110");
datum.put("eventType", "PLine");
datum.put("timeStamp", System.currentTimeMillis());
datum.put("sessionId", "123456II");
Map<String, Schema> unions = new HashMap<String, Schema>();
List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
for (Schema sch : typeList) {
unions.put(sch.getName(), sch);
}
GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
plineDatum.put("text", "How can I help you?");
plineDatum.put("lineType", 1);
plineDatum.put("repId", "REPID12345");
datum.put("subrecord", plineDatum);
// write the file
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
dataFileWriter.create(schema, fileOut);
dataFileWriter.append(datum);
dataFileWriter.append(datum);
dataFileWriter.append(datum);
dataFileWriter.close();
// read the file
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(fileIn, reader);
assertThat("Scema is the same", schema, is(dataFileReader.getSchema()));
for (GenericRecord record : dataFileReader) {
assertThat(record.get("siteId").toString(), is("28280110"));
assertThat(record.get("eventType").toString(), is("PLine"));
}
}
use of org.apache.avro.file.DataFileWriter in project Gaffer by gchq.
the class AvroSerialiser method serialise.
@Override
public byte[] serialise(final Object object) throws SerialisationException {
Schema schema = ReflectData.get().getSchema(object.getClass());
DatumWriter<Object> datumWriter = new ReflectDatumWriter<>(schema);
DataFileWriter<Object> dataFileWriter = new DataFileWriter<>(datumWriter);
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
try {
dataFileWriter.create(schema, byteOut);
dataFileWriter.append(object);
dataFileWriter.flush();
} catch (final IOException e) {
throw new SerialisationException("Unable to serialise given object of class: " + object.getClass().getName(), e);
} finally {
close(dataFileWriter);
}
return byteOut.toByteArray();
}
use of org.apache.avro.file.DataFileWriter in project samza by apache.
the class TestAvroFileHdfsReader method writeTestEventsToFile.
public static void writeTestEventsToFile(String path, int numEvents) throws Exception {
Schema schema = Schema.parse(TestAvroFileHdfsReader.class.getResourceAsStream("/reader/TestEvent.avsc"));
File file = new File(path);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer);
dataFileWriter.create(schema, file);
for (int i = 0; i < numEvents; i++) {
GenericRecord datum = new GenericData.Record(schema);
datum.put(FIELD_1, i);
datum.put(FIELD_2, "string_" + i);
dataFileWriter.append(datum);
}
dataFileWriter.close();
}
use of org.apache.avro.file.DataFileWriter in project samza by apache.
the class AzureBlobAvroWriter method startNextBlob.
private void startNextBlob(Optional<IndexedRecord> optionalIndexedRecord) throws IOException {
if (currentBlobWriterComponents != null) {
LOG.info("Starting new blob as current blob size is " + currentBlobWriterComponents.azureBlobOutputStream.getSize() + " and max blob size is " + maxBlobSize + " or number of records is " + recordsInCurrentBlob + " and max records in blob is " + maxRecordsPerBlob);
currentBlobWriterComponents.dataFileWriter.flush();
currentBlobWriterComponents.azureBlobOutputStream.releaseBuffer();
recordsInCurrentBlob = 0;
}
// optionalIndexedRecord is the first message in this case.
if (datumWriter == null) {
if (optionalIndexedRecord.isPresent()) {
IndexedRecord record = optionalIndexedRecord.get();
schema = record.getSchema();
if (record instanceof SpecificRecord) {
datumWriter = new SpecificDatumWriter<>(schema);
} else {
datumWriter = new GenericDatumWriter<>(schema);
}
} else {
throw new IllegalStateException("Writing without schema setup.");
}
}
String blobURL;
if (useRandomStringInBlobName) {
blobURL = String.format(BLOB_NAME_RANDOM_STRING_AVRO, blobURLPrefix, UTC_FORMATTER.format(System.currentTimeMillis()), UUID.randomUUID().toString().substring(0, 8), compression.getFileExtension());
} else {
blobURL = String.format(BLOB_NAME_AVRO, blobURLPrefix, UTC_FORMATTER.format(System.currentTimeMillis()), compression.getFileExtension());
}
LOG.info("Creating new blob: {}", blobURL);
BlockBlobAsyncClient blockBlobAsyncClient = containerAsyncClient.getBlobAsyncClient(blobURL).getBlockBlobAsyncClient();
DataFileWriter<IndexedRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
AzureBlobOutputStream azureBlobOutputStream;
try {
azureBlobOutputStream = new AzureBlobOutputStream(blockBlobAsyncClient, blobThreadPool, metrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, streamName, flushTimeoutMs, maxBlockFlushThresholdSize, compression);
} catch (Exception e) {
throw new SamzaException("Unable to create AzureBlobOutputStream", e);
}
dataFileWriter.create(schema, azureBlobOutputStream);
dataFileWriter.setFlushOnEveryBlock(false);
this.currentBlobWriterComponents = new BlobWriterComponents(dataFileWriter, azureBlobOutputStream, blockBlobAsyncClient);
allBlobWriterComponents.add(this.currentBlobWriterComponents);
LOG.info("Created new blob: {}", blobURL);
}
use of org.apache.avro.file.DataFileWriter in project presto by prestodb.
the class KafkaAvroSmokeTest method convertRecordToAvro.
private static byte[] convertRecordToAvro(Schema schema, Map<String, Object> values) {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
GenericData.Record record = new GenericData.Record(schema);
values.forEach(record::put);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
dataFileWriter.create(schema, outputStream);
dataFileWriter.append(record);
dataFileWriter.close();
} catch (IOException e) {
throw new UncheckedIOException("Failed to convert to Avro.", e);
}
return outputStream.toByteArray();
}
Aggregations