Search in sources :

Example 1 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project druid by druid-io.

the class InlineSchemaAvroBytesDecoderTest method testParse.

@Test
public void testParse() throws Exception {
    GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
    Schema schema = SomeAvroDatum.getClassSchema();
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    writer.write(someAvroDatum, EncoderFactory.get().directBinaryEncoder(out, null));
    GenericRecord actual = new InlineSchemaAvroBytesDecoder(schema).parse(ByteBuffer.wrap(out.toByteArray()));
    Assert.assertEquals(someAvroDatum.get("id"), actual.get("id"));
}
Also used : Schema(org.apache.avro.Schema) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test) AvroStreamInputRowParserTest(io.druid.data.input.AvroStreamInputRowParserTest)

Example 2 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project druid by druid-io.

the class SchemaRegistryBasedAvroBytesDecoderTest method getAvroDatum.

byte[] getAvroDatum(Schema schema, GenericRecord someAvroDatum) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    writer.write(someAvroDatum, EncoderFactory.get().directBinaryEncoder(out, null));
    return out.toByteArray();
}
Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 3 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project hive by apache.

the class AvroContainerOutputFormat method getHiveRecordWriter.

@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
    Schema schema;
    try {
        schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
    } catch (AvroSerdeException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        dfw.setCodec(factory);
    }
    dfw.create(schema, path.getFileSystem(jobConf).create(path));
    return new AvroGenericRecordWriter(dfw);
}
Also used : AvroSerdeException(org.apache.hadoop.hive.serde2.avro.AvroSerdeException) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) CodecFactory(org.apache.avro.file.CodecFactory) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 4 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project pinot by linkedin.

the class BaseClusterIntegrationTest method pushAvroIntoKafka.

public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic, final byte[] header) {
    Properties properties = new Properties();
    properties.put("metadata.broker.list", kafkaBroker);
    properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
    properties.put("request.required.acks", "1");
    ProducerConfig producerConfig = new ProducerConfig(properties);
    Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
    for (File avroFile : avroFiles) {
        try {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
            DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
            BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
            GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(reader.getSchema());
            int recordCount = 0;
            List<KeyedMessage<byte[], byte[]>> messagesToWrite = new ArrayList<KeyedMessage<byte[], byte[]>>(10000);
            int messagesInThisBatch = 0;
            for (GenericRecord genericRecord : reader) {
                outputStream.reset();
                if (header != null && 0 < header.length) {
                    outputStream.write(header);
                }
                datumWriter.write(genericRecord, binaryEncoder);
                binaryEncoder.flush();
                byte[] bytes = outputStream.toByteArray();
                KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(kafkaTopic, Longs.toByteArray(System.currentTimeMillis()), bytes);
                if (BATCH_KAFKA_MESSAGES) {
                    messagesToWrite.add(data);
                    messagesInThisBatch++;
                    if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
                        LOGGER.debug("Sending a batch of {} records to Kafka", messagesInThisBatch);
                        messagesInThisBatch = 0;
                        producer.send(messagesToWrite);
                        messagesToWrite.clear();
                    }
                } else {
                    producer.send(data);
                }
                recordCount += 1;
            }
            if (BATCH_KAFKA_MESSAGES) {
                LOGGER.info("Sending last match of {} records to Kafka", messagesToWrite.size());
                producer.send(messagesToWrite);
            }
            outputStream.close();
            reader.close();
            LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic + " from file " + avroFile.getName());
            int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
            LOGGER.info("Total records written so far " + totalRecordCount);
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}
Also used : EncoderFactory(org.apache.avro.io.EncoderFactory) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) Producer(kafka.javaapi.producer.Producer) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) KeyedMessage(kafka.producer.KeyedMessage) File(java.io.File)

Example 5 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project pinot by linkedin.

the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment1.

protected void generateAndUploadRandomSegment1(final String segmentName, int rowCount) throws Exception {
    ThreadLocalRandom random = ThreadLocalRandom.current();
    Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
    GenericRecord record = new GenericData.Record(schema);
    GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    final File avroFile = new File(_tmpDir, segmentName + ".avro");
    fileWriter.create(schema, avroFile);
    for (int i = 0; i < rowCount; i++) {
        record.put(0, random.nextInt());
        fileWriter.append(record);
    }
    fileWriter.close();
    final int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
    final String TAR_GZ_FILE_EXTENTION = ".tar.gz";
    File segmentTarDir = new File(_tarsDir, segmentName);
    buildSegment(segmentTarDir, avroFile, segmentIndex, segmentName, 0);
    String segmentFileName = segmentName;
    for (String name : segmentTarDir.list()) {
        if (name.endsWith(TAR_GZ_FILE_EXTENTION)) {
            segmentFileName = name;
        }
    }
    File file = new File(segmentTarDir, segmentFileName);
    long segmentLength = file.length();
    final File segmentTarDir1 = new File(_tarsDir, segmentName);
    FileUtils.deleteQuietly(segmentTarDir);
    new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                buildSegment(segmentTarDir1, avroFile, segmentIndex, segmentName, 5);
            } catch (Exception e) {
            }
        }
    }).start();
    FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, segmentLength, 5, 5);
    avroFile.delete();
    FileUtils.deleteQuietly(segmentTarDir);
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Aggregations

GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)114 GenericRecord (org.apache.avro.generic.GenericRecord)98 Schema (org.apache.avro.Schema)61 ByteArrayOutputStream (java.io.ByteArrayOutputStream)50 DataFileWriter (org.apache.avro.file.DataFileWriter)41 File (java.io.File)37 Test (org.junit.Test)35 BinaryEncoder (org.apache.avro.io.BinaryEncoder)29 MockFlowFile (org.apache.nifi.util.MockFlowFile)25 IOException (java.io.IOException)24 TestRunner (org.apache.nifi.util.TestRunner)20 Encoder (org.apache.avro.io.Encoder)19 ByteArrayOutputStream (org.apache.nifi.stream.io.ByteArrayOutputStream)14 HashMap (java.util.HashMap)13 FileOutputStream (java.io.FileOutputStream)10 GenericData (org.apache.avro.generic.GenericData)10 ArrayList (java.util.ArrayList)8 Path (org.apache.hadoop.fs.Path)8 ByteBuffer (java.nio.ByteBuffer)7 ByteArrayInputStream (java.io.ByteArrayInputStream)6