use of org.apache.avro.specific.SpecificDatumReader in project flink by apache.
the class RollingSinkITCase method testNonRollingAvroKeyValueWithCompressionWriter.
/**
* This tests {@link AvroKeyValueSinkWriter}
* with non-rolling output and with compression.
*/
@Test
public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception {
final int NUM_ELEMENTS = 20;
final int PARALLELISM = 2;
final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out";
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
Map<String, String> properties = new HashMap<>();
Schema keySchema = Schema.create(Type.INT);
Schema valueSchema = Schema.create(Type.STRING);
properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);
RollingSink<Tuple2<Integer, String>> sink = new RollingSink<Tuple2<Integer, String>>(outPath).setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties)).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
source.addSink(sink);
env.execute("RollingSink Avro KeyValue Writer Test");
GenericData.setStringType(valueSchema, StringType.String);
Schema elementSchema = AvroKeyValue.getSchema(keySchema, valueSchema);
FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<GenericRecord>(elementSchema);
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(inStream, elementReader);
for (int i = 0; i < NUM_ELEMENTS; i += 2) {
AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValue<Integer, String>(dataFileStream.next());
int key = wrappedEntry.getKey().intValue();
Assert.assertEquals(i, key);
String value = wrappedEntry.getValue();
Assert.assertEquals("message #" + i, value);
}
dataFileStream.close();
inStream.close();
inStream = dfs.open(new Path(outPath + "/part-1-0"));
dataFileStream = new DataFileStream<GenericRecord>(inStream, elementReader);
for (int i = 1; i < NUM_ELEMENTS; i += 2) {
AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValue<Integer, String>(dataFileStream.next());
int key = wrappedEntry.getKey().intValue();
Assert.assertEquals(i, key);
String value = wrappedEntry.getValue();
Assert.assertEquals("message #" + i, value);
}
dataFileStream.close();
inStream.close();
}
use of org.apache.avro.specific.SpecificDatumReader in project flink by apache.
the class AvroInputFormat method initReader.
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
DatumReader<E> datumReader;
if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
datumReader = new GenericDatumReader<E>();
} else {
datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
}
if (LOG.isInfoEnabled()) {
LOG.info("Opening split {}", split);
}
SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
}
end = split.getStart() + split.getLength();
recordsReadSinceLastSync = 0;
return dataFileReader;
}
use of org.apache.avro.specific.SpecificDatumReader in project databus by linkedin.
the class DbusEventAvroDecoder method getTypedValue.
@Override
public <T extends SpecificRecord> T getTypedValue(DbusEvent e, T reuse, Class<T> targetClass) {
if (null == reuse) {
try {
reuse = targetClass.newInstance();
} catch (InstantiationException e1) {
LOG.error("getTypedValue class instantiation error (" + e1.getMessage() + ") for event " + e, e1);
return null;
} catch (IllegalAccessException e1) {
LOG.error("getTypedValue access error (" + e1.getMessage() + ") for event " + e, e1);
return null;
}
}
byte[] md5 = new byte[16];
e.schemaId(md5);
SchemaId schemaId = new SchemaId(md5);
VersionedSchema writerSchema = _schemaSet.getById(schemaId);
if (null == writerSchema) {
LOG.error("Unable to find schema for id " + schemaId + "; event = " + e);
throw new DatabusRuntimeException("No schema available to decode event " + e);
}
ByteBuffer valueBuffer = e.value();
byte[] valueBytes = new byte[valueBuffer.remaining()];
valueBuffer.get(valueBytes);
try {
//JsonDecoder jsonDec = new JsonDecoder(sourceSchema.getSchema(),new ByteArrayInputStream(valueBytes));
binDecoder.set(DecoderFactory.defaultFactory().createBinaryDecoder(valueBytes, binDecoder.get()));
SpecificDatumReader<SpecificRecord> reader = new SpecificDatumReader<SpecificRecord>(writerSchema.getSchema(), reuse.getSchema());
return targetClass.cast(reader.read(reuse, binDecoder.get()));
} catch (IOException e1) {
LOG.error("getTypedValue IO error (" + e1.getMessage() + ") for event " + e, e1);
}
return reuse;
}
use of org.apache.avro.specific.SpecificDatumReader in project gora by apache.
the class AccumuloStore method populate.
public ByteSequence populate(Iterator<Entry<Key, Value>> iter, T persistent) throws IOException {
ByteSequence row = null;
Map<Utf8, Object> currentMap = null;
List currentArray = null;
Text currentFam = null;
int currentPos = 0;
Schema currentSchema = null;
Field currentField = null;
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(new byte[0], null);
while (iter.hasNext()) {
Entry<Key, Value> entry = iter.next();
if (row == null) {
row = entry.getKey().getRowData();
}
byte[] val = entry.getValue().get();
Field field = fieldMap.get(getFieldName(entry));
if (currentMap != null) {
if (currentFam.equals(entry.getKey().getColumnFamily())) {
currentMap.put(new Utf8(entry.getKey().getColumnQualifierData().toArray()), fromBytes(currentSchema, entry.getValue().get()));
continue;
} else {
persistent.put(currentPos, currentMap);
currentMap = null;
}
} else if (currentArray != null) {
if (currentFam.equals(entry.getKey().getColumnFamily())) {
currentArray.add(fromBytes(currentSchema, entry.getValue().get()));
continue;
} else {
persistent.put(currentPos, new GenericData.Array<T>(currentField.schema(), currentArray));
currentArray = null;
}
}
switch(field.schema().getType()) {
case // first entry only. Next are handled above on the next loop
MAP:
currentMap = new DirtyMapWrapper<>(new HashMap<Utf8, Object>());
currentPos = field.pos();
currentFam = entry.getKey().getColumnFamily();
currentSchema = field.schema().getValueType();
currentMap.put(new Utf8(entry.getKey().getColumnQualifierData().toArray()), fromBytes(currentSchema, entry.getValue().get()));
break;
case ARRAY:
currentArray = new DirtyListWrapper<>(new ArrayList<>());
currentPos = field.pos();
currentFam = entry.getKey().getColumnFamily();
currentSchema = field.schema().getElementType();
currentField = field;
currentArray.add(fromBytes(currentSchema, entry.getValue().get()));
break;
case // default value of null acts like union with null
UNION:
Schema effectiveSchema = field.schema().getTypes().get(firstNotNullSchemaTypeIndex(field.schema()));
// map and array were coded without union index so need to be read the same way
if (effectiveSchema.getType() == Type.ARRAY) {
currentArray = new DirtyListWrapper<>(new ArrayList<>());
currentPos = field.pos();
currentFam = entry.getKey().getColumnFamily();
currentSchema = field.schema().getElementType();
currentField = field;
currentArray.add(fromBytes(currentSchema, entry.getValue().get()));
break;
} else if (effectiveSchema.getType() == Type.MAP) {
currentMap = new DirtyMapWrapper<>(new HashMap<Utf8, Object>());
currentPos = field.pos();
currentFam = entry.getKey().getColumnFamily();
currentSchema = effectiveSchema.getValueType();
currentMap.put(new Utf8(entry.getKey().getColumnQualifierData().toArray()), fromBytes(currentSchema, entry.getValue().get()));
break;
}
// continue like a regular top-level union
case RECORD:
SpecificDatumReader<?> reader = new SpecificDatumReader<Schema>(field.schema());
persistent.put(field.pos(), reader.read(null, DecoderFactory.get().binaryDecoder(val, decoder)));
break;
default:
persistent.put(field.pos(), fromBytes(field.schema(), entry.getValue().get()));
}
}
if (currentMap != null) {
persistent.put(currentPos, currentMap);
} else if (currentArray != null) {
persistent.put(currentPos, new GenericData.Array<T>(currentField.schema(), currentArray));
}
persistent.clearDirty();
return row;
}
use of org.apache.avro.specific.SpecificDatumReader in project gora by apache.
the class AvroSerializerUtil method deserializer.
public static Object deserializer(Object value, Schema schema) throws IOException {
String schemaId = schema.getFullName();
SpecificDatumReader<?> reader = readerMap.get(schemaId);
if (reader == null) {
// ignore dirty bits
reader = new SpecificDatumReader(schema);
SpecificDatumReader localReader = null;
if ((localReader = readerMap.putIfAbsent(schemaId, reader)) != null) {
reader = localReader;
}
}
// initialize a decoder, possibly reusing previous one
BinaryDecoder decoderFromCache = decoders.get();
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder((byte[]) value, null);
// put in threadlocal cache if the initial get was empty
if (decoderFromCache == null) {
decoders.set(decoder);
}
Object result = reader.read(null, decoder);
return result;
}
Aggregations