Search in sources :

Example 56 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project databus by linkedin.

the class RelayEventGenerator method populateEvents.

int populateEvents(String source, short id, GenericRecord record, DbusEventKey key, byte[] schemaId, DbusEventsStatisticsCollector statsCollector, DbusEventBufferAppendable buffer) {
    if (record != null && key != null) {
        try {
            // Serialize the row
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            Encoder encoder = new BinaryEncoder(bos);
            GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
            writer.write(record, encoder);
            byte[] serializedValue = bos.toByteArray();
            short pPartitionId = RngUtils.randomPositiveShort();
            short lPartitionId = RngUtils.randomPositiveShort();
            long timeStamp = System.currentTimeMillis() * 1000000;
            buffer.appendEvent(key, pPartitionId, lPartitionId, timeStamp, id, schemaId, serializedValue, false, statsCollector);
            return 1;
        } catch (IOException io) {
            LOG.error("Cannot create byte stream payload: " + source);
        }
    }
    return 0;
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) Encoder(org.apache.avro.io.Encoder) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 57 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project flink by apache.

the class AvroOutputFormat method open.

@Override
public void open(int taskNumber, int numTasks) throws IOException {
    super.open(taskNumber, numTasks);
    DatumWriter<E> datumWriter;
    Schema schema;
    if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
        datumWriter = new SpecificDatumWriter<E>(avroValueType);
        try {
            schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
        } catch (InstantiationException | IllegalAccessException e) {
            throw new RuntimeException(e.getMessage());
        }
    } else if (org.apache.avro.generic.GenericRecord.class.isAssignableFrom(avroValueType)) {
        if (userDefinedSchema == null) {
            throw new IllegalStateException("Schema must be set when using Generic Record");
        }
        datumWriter = new GenericDatumWriter<E>(userDefinedSchema);
        schema = userDefinedSchema;
    } else {
        datumWriter = new ReflectDatumWriter<E>(avroValueType);
        schema = ReflectData.get().getSchema(avroValueType);
    }
    dataFileWriter = new DataFileWriter<E>(datumWriter);
    if (codec != null) {
        dataFileWriter.setCodec(codec.getCodecFactory());
    }
    if (userDefinedSchema == null) {
        dataFileWriter.create(schema, stream);
    } else {
        dataFileWriter.create(userDefinedSchema, stream);
    }
}
Also used : Schema(org.apache.avro.Schema) ReflectDatumWriter(org.apache.avro.reflect.ReflectDatumWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter)

Example 58 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project flink by apache.

the class AvroRowDataDeSerializationSchemaTest method testSerializeDeserialize.

@Test
public void testSerializeDeserialize() throws Exception {
    final DataType dataType = ROW(FIELD("bool", BOOLEAN()), FIELD("tinyint", TINYINT()), FIELD("smallint", SMALLINT()), FIELD("int", INT()), FIELD("bigint", BIGINT()), FIELD("float", FLOAT()), FIELD("double", DOUBLE()), FIELD("name", STRING()), FIELD("bytes", BYTES()), FIELD("decimal", DECIMAL(19, 6)), FIELD("doubles", ARRAY(DOUBLE())), FIELD("time", TIME(0)), FIELD("date", DATE()), FIELD("timestamp3", TIMESTAMP(3)), FIELD("timestamp3_2", TIMESTAMP(3)), FIELD("map", MAP(STRING(), BIGINT())), FIELD("map2map", MAP(STRING(), MAP(STRING(), INT()))), FIELD("map2array", MAP(STRING(), ARRAY(INT()))), FIELD("nullEntryMap", MAP(STRING(), STRING()))).notNull();
    final RowType rowType = (RowType) dataType.getLogicalType();
    final Schema schema = AvroSchemaConverter.convertToSchema(rowType);
    final GenericRecord record = new GenericData.Record(schema);
    record.put(0, true);
    record.put(1, (int) Byte.MAX_VALUE);
    record.put(2, (int) Short.MAX_VALUE);
    record.put(3, 33);
    record.put(4, 44L);
    record.put(5, 12.34F);
    record.put(6, 23.45);
    record.put(7, "hello avro");
    record.put(8, ByteBuffer.wrap(new byte[] { 1, 2, 4, 5, 6, 7, 8, 12 }));
    record.put(9, ByteBuffer.wrap(BigDecimal.valueOf(123456789, 6).unscaledValue().toByteArray()));
    List<Double> doubles = new ArrayList<>();
    doubles.add(1.2);
    doubles.add(3.4);
    doubles.add(567.8901);
    record.put(10, doubles);
    record.put(11, 18397);
    record.put(12, 10087);
    record.put(13, 1589530213123L);
    record.put(14, 1589530213122L);
    Map<String, Long> map = new HashMap<>();
    map.put("flink", 12L);
    map.put("avro", 23L);
    record.put(15, map);
    Map<String, Map<String, Integer>> map2map = new HashMap<>();
    Map<String, Integer> innerMap = new HashMap<>();
    innerMap.put("inner_key1", 123);
    innerMap.put("inner_key2", 234);
    map2map.put("outer_key", innerMap);
    record.put(16, map2map);
    List<Integer> list1 = Arrays.asList(1, 2, 3, 4, 5, 6);
    List<Integer> list2 = Arrays.asList(11, 22, 33, 44, 55);
    Map<String, List<Integer>> map2list = new HashMap<>();
    map2list.put("list1", list1);
    map2list.put("list2", list2);
    record.put(17, map2list);
    Map<String, String> map2 = new HashMap<>();
    map2.put("key1", null);
    record.put(18, map2);
    AvroRowDataSerializationSchema serializationSchema = createSerializationSchema(dataType);
    AvroRowDataDeserializationSchema deserializationSchema = createDeserializationSchema(dataType);
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(schema);
    Encoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null);
    datumWriter.write(record, encoder);
    encoder.flush();
    byte[] input = byteArrayOutputStream.toByteArray();
    RowData rowData = deserializationSchema.deserialize(input);
    byte[] output = serializationSchema.serialize(rowData);
    assertArrayEquals(input, output);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) RowType(org.apache.flink.table.types.logical.RowType) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) Encoder(org.apache.avro.io.Encoder) DataType(org.apache.flink.table.types.DataType) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) LogicalTimeRecord(org.apache.flink.formats.avro.generated.LogicalTimeRecord) List(java.util.List) ArrayList(java.util.ArrayList) GenericRecord(org.apache.avro.generic.GenericRecord) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 59 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.

the class AvroByteReaderTest method initInputFile.

/**
 * Write input elements to a file and return information about the Avro-encoded file.
 */
private <T> AvroFileInfo<T> initInputFile(List<List<T>> elemsList, Coder<T> coder) throws Exception {
    File tmpFile = tmpFolder.newFile("file.avro");
    AvroFileInfo<T> fileInfo = new AvroFileInfo<>();
    fileInfo.filename = tmpFile.getPath();
    // Write the data.
    OutputStream outStream = Channels.newOutputStream(FileSystems.create(FileSystems.matchNewResource(fileInfo.filename, false), MimeTypes.BINARY));
    Schema schema = Schema.create(Schema.Type.BYTES);
    DatumWriter<ByteBuffer> datumWriter = new GenericDatumWriter<>(schema);
    try (DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter)) {
        fileWriter.create(schema, outStream);
        boolean first = true;
        for (List<T> elems : elemsList) {
            if (first) {
                first = false;
            } else {
                // Ensure a block boundary here.
                long syncPoint = fileWriter.sync();
                fileInfo.syncPoints.add(syncPoint);
            }
            for (T elem : elems) {
                byte[] encodedElement = CoderUtils.encodeToByteArray(coder, elem);
                fileWriter.append(ByteBuffer.wrap(encodedElement));
                fileInfo.elementSizes.add(encodedElement.length);
                fileInfo.totalElementEncodedSize += encodedElement.length;
            }
        }
    }
    return fileInfo;
}
Also used : OutputStream(java.io.OutputStream) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteBuffer(java.nio.ByteBuffer) File(java.io.File)

Example 60 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.

the class FakeJobService method writeRowsHelper.

private void writeRowsHelper(List<TableRow> rows, Schema avroSchema, String destinationPattern, int shard) {
    String filename = destinationPattern.replace("*", String.format("%012d", shard));
    try (WritableByteChannel channel = FileSystems.create(FileSystems.matchNewResource(filename, false), MimeTypes.BINARY);
        DataFileWriter<GenericRecord> tableRowWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema)).create(avroSchema, Channels.newOutputStream(channel))) {
        for (Map<String, Object> record : rows) {
            GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(avroSchema);
            for (Map.Entry<String, Object> field : record.entrySet()) {
                genericRecordBuilder.set(field.getKey(), field.getValue());
            }
            tableRowWriter.append(genericRecordBuilder.build());
        }
    } catch (IOException e) {
        throw new IllegalStateException(String.format("Could not create destination for extract job %s", filename), e);
    }
}
Also used : WritableByteChannel(java.nio.channels.WritableByteChannel) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) Map(java.util.Map)

Aggregations

GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)127 GenericRecord (org.apache.avro.generic.GenericRecord)105 Schema (org.apache.avro.Schema)69 ByteArrayOutputStream (java.io.ByteArrayOutputStream)57 DataFileWriter (org.apache.avro.file.DataFileWriter)47 File (java.io.File)40 Test (org.junit.Test)37 IOException (java.io.IOException)29 BinaryEncoder (org.apache.avro.io.BinaryEncoder)29 MockFlowFile (org.apache.nifi.util.MockFlowFile)25 Encoder (org.apache.avro.io.Encoder)23 TestRunner (org.apache.nifi.util.TestRunner)20 HashMap (java.util.HashMap)14 ByteArrayOutputStream (org.apache.nifi.stream.io.ByteArrayOutputStream)14 GenericData (org.apache.avro.generic.GenericData)12 ByteArrayInputStream (java.io.ByteArrayInputStream)11 FileOutputStream (java.io.FileOutputStream)10 InputStream (java.io.InputStream)9 ArrayList (java.util.ArrayList)8 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)8