Search in sources :

Example 26 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project druid by druid-io.

the class InlineSchemasAvroBytesDecoder method parse.

// It is assumed that record has following format.
// byte 1 : version, static 0x1
// byte 2-5 : int schemaId
// remaining bytes would have avro data
@Override
public GenericRecord parse(ByteBuffer bytes) {
    if (bytes.remaining() < 5) {
        throw new ParseException("record must have at least 5 bytes carrying version and schemaId");
    }
    byte version = bytes.get();
    if (version != V1) {
        throw new ParseException("found record of arbitrary version [%s]", version);
    }
    int schemaId = bytes.getInt();
    Schema schemaObj = schemaObjs.get(schemaId);
    if (schemaObj == null) {
        throw new ParseException("Failed to find schema for id [%s]", schemaId);
    }
    try {
        DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schemaObj);
        ByteBufferInputStream inputStream = new ByteBufferInputStream(Collections.singletonList(bytes));
        return reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null));
    } catch (Exception e) {
        throw new ParseException(e, "Fail to decode avro message with schemaId [%s].", schemaId);
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) ByteBufferInputStream(org.apache.avro.util.ByteBufferInputStream) ParseException(io.druid.java.util.common.parsers.ParseException) GenericRecord(org.apache.avro.generic.GenericRecord) ParseException(io.druid.java.util.common.parsers.ParseException)

Example 27 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project druid by druid-io.

the class AvroHadoopInputRowParserTest method buildPigAvro.

private static GenericRecord buildPigAvro(GenericRecord datum, String inputStorage, String outputStorage) throws IOException {
    final File tmpDir = Files.createTempDir();
    FileReader<GenericRecord> reader = null;
    PigServer pigServer = null;
    try {
        // 0. write avro object into temp file.
        File someAvroDatumFile = new File(tmpDir, "someAvroDatum.avro");
        DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>());
        dataFileWriter.create(SomeAvroDatum.getClassSchema(), someAvroDatumFile);
        dataFileWriter.append(datum);
        dataFileWriter.close();
        // 1. read avro files into Pig
        pigServer = new PigServer(ExecType.LOCAL);
        pigServer.registerQuery(String.format("A = LOAD '%s' USING %s;", someAvroDatumFile, inputStorage));
        // 2. write new avro file using AvroStorage
        File outputDir = new File(tmpDir, "output");
        pigServer.store("A", String.valueOf(outputDir), outputStorage);
        // 3. read avro object from AvroStorage
        reader = DataFileReader.openReader(new File(outputDir, "part-m-00000.avro"), new GenericDatumReader<GenericRecord>());
        return reader.next();
    } finally {
        if (pigServer != null) {
            pigServer.shutdown();
        }
        Closeables.close(reader, true);
        FileUtils.deleteDirectory(tmpDir);
    }
}
Also used : PigServer(org.apache.pig.PigServer) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 28 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project druid by druid-io.

the class InlineSchemaAvroBytesDecoderTest method testParse.

@Test
public void testParse() throws Exception {
    GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
    Schema schema = SomeAvroDatum.getClassSchema();
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    writer.write(someAvroDatum, EncoderFactory.get().directBinaryEncoder(out, null));
    GenericRecord actual = new InlineSchemaAvroBytesDecoder(schema).parse(ByteBuffer.wrap(out.toByteArray()));
    Assert.assertEquals(someAvroDatum.get("id"), actual.get("id"));
}
Also used : Schema(org.apache.avro.Schema) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test) AvroStreamInputRowParserTest(io.druid.data.input.AvroStreamInputRowParserTest)

Example 29 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project druid by druid-io.

the class SchemaRegistryBasedAvroBytesDecoderTest method testParse.

@Test
public void testParse() throws Exception {
    // Given
    when(registry.getByID(eq(1234))).thenReturn(SomeAvroDatum.getClassSchema());
    GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
    Schema schema = SomeAvroDatum.getClassSchema();
    byte[] bytes = getAvroDatum(schema, someAvroDatum);
    ByteBuffer bb = ByteBuffer.allocate(bytes.length + 5).put((byte) 0).putInt(1234).put(bytes);
    bb.rewind();
    // When
    GenericRecord actual = new SchemaRegistryBasedAvroBytesDecoder(registry).parse(bb);
    // Then
    Assert.assertEquals(someAvroDatum.get("id"), actual.get("id"));
}
Also used : Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test) AvroStreamInputRowParserTest(io.druid.data.input.AvroStreamInputRowParserTest)

Example 30 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project druid by druid-io.

the class SchemaRegistryBasedAvroBytesDecoderTest method getAvroDatum.

byte[] getAvroDatum(Schema schema, GenericRecord someAvroDatum) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    writer.write(someAvroDatum, EncoderFactory.get().directBinaryEncoder(out, null));
    return out.toByteArray();
}
Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11