Search in sources :

Example 21 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project druid by druid-io.

the class SchemaRepoBasedAvroBytesDecoder method parse.

@Override
public GenericRecord parse(ByteBuffer bytes) {
    Pair<SUBJECT, ID> subjectAndId = subjectAndIdConverter.getSubjectAndId(bytes);
    Schema schema = typedRepository.getSchema(subjectAndId.lhs, subjectAndId.rhs);
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
    ByteBufferInputStream inputStream = new ByteBufferInputStream(Collections.singletonList(bytes));
    try {
        return reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null));
    } catch (IOException e) {
        throw new ParseException(e, "Fail to decode avro message!");
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) ByteBufferInputStream(org.apache.avro.util.ByteBufferInputStream) IOException(java.io.IOException) ParseException(io.druid.java.util.common.parsers.ParseException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 22 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project h2o-3 by h2oai.

the class AvroParser method runOnPreview.

static <T> T runOnPreview(byte[] bits, AvroPreviewProcessor<T> processor) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    SeekableByteArrayInput sbai = new SeekableByteArrayInput(bits);
    DataFileReader<GenericRecord> dataFileReader = null;
    try {
        dataFileReader = new DataFileReader<>(sbai, datumReader);
        int headerLen = (int) dataFileReader.previousSync();
        byte[] header = Arrays.copyOf(bits, headerLen);
        if (dataFileReader.hasNext()) {
            GenericRecord gr = dataFileReader.next();
            return processor.process(header, gr, dataFileReader.getBlockCount(), dataFileReader.getBlockSize());
        } else {
            throw new RuntimeException("Empty Avro file - cannot run preview! ");
        }
    } finally {
        try {
            if (dataFileReader != null)
                dataFileReader.close();
        } catch (IOException safeToIgnore) {
        }
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput)

Example 23 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project h2o-3 by h2oai.

the class AvroParser method parseChunk.

@Override
protected final ParseWriter parseChunk(int cidx, ParseReader din, ParseWriter dout) {
    // We will read GenericRecord and load them based on schema
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    final H2OSeekableInputAdaptor sbai = new H2OSeekableInputAdaptor(cidx, din);
    DataFileReader<GenericRecord> dataFileReader = null;
    int cnt = 0;
    try {
        // Reconstruct Avro header
        DataFileStream.Header fakeHeader = new DataFileReader<>(new SeekableByteArrayInput(this.header), datumReader).getHeader();
        dataFileReader = DataFileReader.openReader(sbai, datumReader, fakeHeader, true);
        Schema schema = dataFileReader.getSchema();
        GenericRecord gr = new GenericData.Record(schema);
        Schema.Field[] flatSchema = flatSchema(schema);
        long sync = dataFileReader.previousSync();
        if (sbai.chunkCnt == 0) {
            // Find data in first chunk
            while (dataFileReader.hasNext() && dataFileReader.previousSync() == sync) {
                gr = dataFileReader.next(gr);
                // Write values to the output
                // FIXME: what if user change input names, or ignore an input column?
                write2frame(gr, _setup.getColumnNames(), flatSchema, _setup.getColumnTypes(), dout);
                cnt++;
            }
        }
    // else first chunk does not contain synchronization block, so give up and let another reader to use it
    } catch (Throwable e) {
        e.printStackTrace();
    }
    Log.trace(String.format("Avro: ChunkIdx: %d read %d records, start at %d off, block count: %d, block size: %d", cidx, cnt, din.getChunkDataStart(cidx), dataFileReader.getBlockCount(), dataFileReader.getBlockSize()));
    return dout;
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileStream(org.apache.avro.file.DataFileStream) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput)

Example 24 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project haivvreo by jghoman.

the class AvroGenericRecordWritable method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    Schema schema = Schema.parse(in.readUTF());
    record = new GenericData.Record(schema);
    binaryDecoder = DecoderFactory.defaultFactory().createBinaryDecoder((InputStream) in, binaryDecoder);
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema);
    record = gdr.read(record, binaryDecoder);
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData)

Example 25 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project haivvreo by jghoman.

the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.

@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
    // Need to verify that when reading a datum with an updated reader schema
    // that the datum then returns the reader schema as its own, since we
    // depend on this behavior in order to avoid re-encoding the datum
    // in the serde.
    String v0 = "{\n" + "    \"namespace\": \"com.linkedin.haivvreo\",\n" + "    \"name\": \"SomeStuff\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"v0\",\n" + "            \"type\":\"string\"\n" + "        }\n" + "    ]\n" + "}";
    String v1 = "{\n" + "    \"namespace\": \"com.linkedin.haivvreo\",\n" + "    \"name\": \"SomeStuff\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"v0\",\n" + "            \"type\":\"string\"\n" + "        },\n" + "        {\n" + "            \"name\":\"v1\",\n" + "            \"type\":\"string\",\n" + "            \"default\":\"v1_default\"" + "        }\n" + "    ]\n" + "}";
    Schema[] schemas = { Schema.parse(v0), Schema.parse(v1) };
    // Encode a schema with v0, write out.
    GenericRecord record = new GenericData.Record(schemas[0]);
    record.put("v0", "v0 value");
    assertTrue(GenericData.get().validate(schemas[0], record));
    // Write datum out to a stream
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    dfw.create(schemas[0], baos);
    dfw.append(record);
    dfw.close();
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
    gdr.setExpected(schemas[1]);
    DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
    assertTrue(dfs.hasNext());
    GenericRecord next = dfs.next();
    assertEquals("v0 value", next.get("v0").toString());
    assertEquals("v1_default", next.get("v1").toString());
    // Now the most important check - when we query this record for its schema,
    // we should get back the latest, reader schema:
    assertEquals(schemas[1], next.getSchema());
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) ByteArrayInputStream(java.io.ByteArrayInputStream) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

GenericDatumReader (org.apache.avro.generic.GenericDatumReader)46 GenericRecord (org.apache.avro.generic.GenericRecord)31 Schema (org.apache.avro.Schema)20 IOException (java.io.IOException)15 File (java.io.File)10 DataFileStream (org.apache.avro.file.DataFileStream)10 Decoder (org.apache.avro.io.Decoder)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 GenericData (org.apache.avro.generic.GenericData)7 DataFileReader (org.apache.avro.file.DataFileReader)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 JsonDecoder (org.apache.avro.io.JsonDecoder)5 ParseException (io.druid.java.util.common.parsers.ParseException)4 FileInputStream (java.io.FileInputStream)4 DataFileWriter (org.apache.avro.file.DataFileWriter)4 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 Map (java.util.Map)3 ChannelBufferInputStream (org.jboss.netty.buffer.ChannelBufferInputStream)3