Search in sources :

Example 86 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project avro-kafka-storm by ransilberman.

the class MainTest method testDataFile.

@Test
public void testDataFile() throws IOException {
    File fileOut = new File("data.avro");
    File fileIn = new File("data.avro");
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
    GenericRecord datum = new GenericData.Record(schema);
    datum.put("revision", 1L);
    datum.put("siteId", "28280110");
    datum.put("eventType", "PLine");
    datum.put("timeStamp", System.currentTimeMillis());
    datum.put("sessionId", "123456II");
    Map<String, Schema> unions = new HashMap<String, Schema>();
    List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
    for (Schema sch : typeList) {
        unions.put(sch.getName(), sch);
    }
    GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
    plineDatum.put("text", "How can I help you?");
    plineDatum.put("lineType", 1);
    plineDatum.put("repId", "REPID12345");
    datum.put("subrecord", plineDatum);
    // write the file
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
    dataFileWriter.create(schema, fileOut);
    dataFileWriter.append(datum);
    dataFileWriter.append(datum);
    dataFileWriter.append(datum);
    dataFileWriter.close();
    // read the file
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(fileIn, reader);
    assertThat("Scema is the same", schema, is(dataFileReader.getSchema()));
    for (GenericRecord record : dataFileReader) {
        assertThat(record.get("siteId").toString(), is("28280110"));
        assertThat(record.get("eventType").toString(), is("PLine"));
    }
}
Also used : HashMap(java.util.HashMap) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Example 87 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project Gaffer by gchq.

the class AvroSerialiser method serialise.

@Override
public byte[] serialise(final Object object) throws SerialisationException {
    Schema schema = ReflectData.get().getSchema(object.getClass());
    DatumWriter<Object> datumWriter = new ReflectDatumWriter<>(schema);
    DataFileWriter<Object> dataFileWriter = new DataFileWriter<>(datumWriter);
    ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
    try {
        dataFileWriter.create(schema, byteOut);
        dataFileWriter.append(object);
        dataFileWriter.flush();
    } catch (final IOException e) {
        throw new SerialisationException("Unable to serialise given object of class: " + object.getClass().getName(), e);
    } finally {
        close(dataFileWriter);
    }
    return byteOut.toByteArray();
}
Also used : SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) ReflectDatumWriter(org.apache.avro.reflect.ReflectDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException)

Example 88 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project samza by apache.

the class TestAvroFileHdfsReader method writeTestEventsToFile.

public static void writeTestEventsToFile(String path, int numEvents) throws Exception {
    Schema schema = Schema.parse(TestAvroFileHdfsReader.class.getResourceAsStream("/reader/TestEvent.avsc"));
    File file = new File(path);
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer);
    dataFileWriter.create(schema, file);
    for (int i = 0; i < numEvents; i++) {
        GenericRecord datum = new GenericData.Record(schema);
        datum.put(FIELD_1, i);
        datum.put(FIELD_2, "string_" + i);
        dataFileWriter.append(datum);
    }
    dataFileWriter.close();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 89 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project samza by apache.

the class AzureBlobAvroWriter method startNextBlob.

private void startNextBlob(Optional<IndexedRecord> optionalIndexedRecord) throws IOException {
    if (currentBlobWriterComponents != null) {
        LOG.info("Starting new blob as current blob size is " + currentBlobWriterComponents.azureBlobOutputStream.getSize() + " and max blob size is " + maxBlobSize + " or number of records is " + recordsInCurrentBlob + " and max records in blob is " + maxRecordsPerBlob);
        currentBlobWriterComponents.dataFileWriter.flush();
        currentBlobWriterComponents.azureBlobOutputStream.releaseBuffer();
        recordsInCurrentBlob = 0;
    }
    // optionalIndexedRecord is the first message in this case.
    if (datumWriter == null) {
        if (optionalIndexedRecord.isPresent()) {
            IndexedRecord record = optionalIndexedRecord.get();
            schema = record.getSchema();
            if (record instanceof SpecificRecord) {
                datumWriter = new SpecificDatumWriter<>(schema);
            } else {
                datumWriter = new GenericDatumWriter<>(schema);
            }
        } else {
            throw new IllegalStateException("Writing without schema setup.");
        }
    }
    String blobURL;
    if (useRandomStringInBlobName) {
        blobURL = String.format(BLOB_NAME_RANDOM_STRING_AVRO, blobURLPrefix, UTC_FORMATTER.format(System.currentTimeMillis()), UUID.randomUUID().toString().substring(0, 8), compression.getFileExtension());
    } else {
        blobURL = String.format(BLOB_NAME_AVRO, blobURLPrefix, UTC_FORMATTER.format(System.currentTimeMillis()), compression.getFileExtension());
    }
    LOG.info("Creating new blob: {}", blobURL);
    BlockBlobAsyncClient blockBlobAsyncClient = containerAsyncClient.getBlobAsyncClient(blobURL).getBlockBlobAsyncClient();
    DataFileWriter<IndexedRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
    AzureBlobOutputStream azureBlobOutputStream;
    try {
        azureBlobOutputStream = new AzureBlobOutputStream(blockBlobAsyncClient, blobThreadPool, metrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, streamName, flushTimeoutMs, maxBlockFlushThresholdSize, compression);
    } catch (Exception e) {
        throw new SamzaException("Unable to create AzureBlobOutputStream", e);
    }
    dataFileWriter.create(schema, azureBlobOutputStream);
    dataFileWriter.setFlushOnEveryBlock(false);
    this.currentBlobWriterComponents = new BlobWriterComponents(dataFileWriter, azureBlobOutputStream, blockBlobAsyncClient);
    allBlobWriterComponents.add(this.currentBlobWriterComponents);
    LOG.info("Created new blob: {}", blobURL);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) DataFileWriter(org.apache.avro.file.DataFileWriter) SamzaException(org.apache.samza.SamzaException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) SpecificRecord(org.apache.avro.specific.SpecificRecord) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient)

Example 90 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project presto by prestodb.

the class KafkaAvroSmokeTest method convertRecordToAvro.

private static byte[] convertRecordToAvro(Schema schema, Map<String, Object> values) {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    GenericData.Record record = new GenericData.Record(schema);
    values.forEach(record::put);
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
        dataFileWriter.create(schema, outputStream);
        dataFileWriter.append(record);
        dataFileWriter.close();
    } catch (IOException e) {
        throw new UncheckedIOException("Failed to convert to Avro.", e);
    }
    return outputStream.toByteArray();
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) UncheckedIOException(java.io.UncheckedIOException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)102 GenericRecord (org.apache.avro.generic.GenericRecord)58 Schema (org.apache.avro.Schema)50 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)47 File (java.io.File)38 ByteArrayOutputStream (java.io.ByteArrayOutputStream)22 IOException (java.io.IOException)22 GenericData (org.apache.avro.generic.GenericData)17 FileOutputStream (java.io.FileOutputStream)15 Test (org.junit.Test)14 HashMap (java.util.HashMap)11 InputStream (java.io.InputStream)10 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)10 ArrayList (java.util.ArrayList)9 Path (org.apache.hadoop.fs.Path)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 OutputStream (java.io.OutputStream)8 ByteBuffer (java.nio.ByteBuffer)7 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)7