use of org.apache.avro.generic.GenericDatumReader in project druid by druid-io.
the class SchemaRepoBasedAvroBytesDecoder method parse.
@Override
public GenericRecord parse(ByteBuffer bytes) {
Pair<SUBJECT, ID> subjectAndId = subjectAndIdConverter.getSubjectAndId(bytes);
Schema schema = typedRepository.getSchema(subjectAndId.lhs, subjectAndId.rhs);
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
ByteBufferInputStream inputStream = new ByteBufferInputStream(Collections.singletonList(bytes));
try {
return reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null));
} catch (IOException e) {
throw new ParseException(e, "Fail to decode avro message!");
}
}
use of org.apache.avro.generic.GenericDatumReader in project h2o-3 by h2oai.
the class AvroParser method runOnPreview.
static <T> T runOnPreview(byte[] bits, AvroPreviewProcessor<T> processor) throws IOException {
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
SeekableByteArrayInput sbai = new SeekableByteArrayInput(bits);
DataFileReader<GenericRecord> dataFileReader = null;
try {
dataFileReader = new DataFileReader<>(sbai, datumReader);
int headerLen = (int) dataFileReader.previousSync();
byte[] header = Arrays.copyOf(bits, headerLen);
if (dataFileReader.hasNext()) {
GenericRecord gr = dataFileReader.next();
return processor.process(header, gr, dataFileReader.getBlockCount(), dataFileReader.getBlockSize());
} else {
throw new RuntimeException("Empty Avro file - cannot run preview! ");
}
} finally {
try {
if (dataFileReader != null)
dataFileReader.close();
} catch (IOException safeToIgnore) {
}
}
}
use of org.apache.avro.generic.GenericDatumReader in project h2o-3 by h2oai.
the class AvroParser method parseChunk.
@Override
protected final ParseWriter parseChunk(int cidx, ParseReader din, ParseWriter dout) {
// We will read GenericRecord and load them based on schema
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
final H2OSeekableInputAdaptor sbai = new H2OSeekableInputAdaptor(cidx, din);
DataFileReader<GenericRecord> dataFileReader = null;
int cnt = 0;
try {
// Reconstruct Avro header
DataFileStream.Header fakeHeader = new DataFileReader<>(new SeekableByteArrayInput(this.header), datumReader).getHeader();
dataFileReader = DataFileReader.openReader(sbai, datumReader, fakeHeader, true);
Schema schema = dataFileReader.getSchema();
GenericRecord gr = new GenericData.Record(schema);
Schema.Field[] flatSchema = flatSchema(schema);
long sync = dataFileReader.previousSync();
if (sbai.chunkCnt == 0) {
// Find data in first chunk
while (dataFileReader.hasNext() && dataFileReader.previousSync() == sync) {
gr = dataFileReader.next(gr);
// Write values to the output
// FIXME: what if user change input names, or ignore an input column?
write2frame(gr, _setup.getColumnNames(), flatSchema, _setup.getColumnTypes(), dout);
cnt++;
}
}
// else first chunk does not contain synchronization block, so give up and let another reader to use it
} catch (Throwable e) {
e.printStackTrace();
}
Log.trace(String.format("Avro: ChunkIdx: %d read %d records, start at %d off, block count: %d, block size: %d", cidx, cnt, din.getChunkDataStart(cidx), dataFileReader.getBlockCount(), dataFileReader.getBlockSize()));
return dout;
}
use of org.apache.avro.generic.GenericDatumReader in project haivvreo by jghoman.
the class AvroGenericRecordWritable method readFields.
@Override
public void readFields(DataInput in) throws IOException {
Schema schema = Schema.parse(in.readUTF());
record = new GenericData.Record(schema);
binaryDecoder = DecoderFactory.defaultFactory().createBinaryDecoder((InputStream) in, binaryDecoder);
GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema);
record = gdr.read(record, binaryDecoder);
}
use of org.apache.avro.generic.GenericDatumReader in project haivvreo by jghoman.
the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.
@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
// Need to verify that when reading a datum with an updated reader schema
// that the datum then returns the reader schema as its own, since we
// depend on this behavior in order to avoid re-encoding the datum
// in the serde.
String v0 = "{\n" + " \"namespace\": \"com.linkedin.haivvreo\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " }\n" + " ]\n" + "}";
String v1 = "{\n" + " \"namespace\": \"com.linkedin.haivvreo\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " },\n" + " {\n" + " \"name\":\"v1\",\n" + " \"type\":\"string\",\n" + " \"default\":\"v1_default\"" + " }\n" + " ]\n" + "}";
Schema[] schemas = { Schema.parse(v0), Schema.parse(v1) };
// Encode a schema with v0, write out.
GenericRecord record = new GenericData.Record(schemas[0]);
record.put("v0", "v0 value");
assertTrue(GenericData.get().validate(schemas[0], record));
// Write datum out to a stream
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
dfw.create(schemas[0], baos);
dfw.append(record);
dfw.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
gdr.setExpected(schemas[1]);
DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
assertTrue(dfs.hasNext());
GenericRecord next = dfs.next();
assertEquals("v0 value", next.get("v0").toString());
assertEquals("v1_default", next.get("v1").toString());
// Now the most important check - when we query this record for its schema,
// we should get back the latest, reader schema:
assertEquals(schemas[1], next.getSchema());
}
Aggregations