use of org.apache.avro.specific.SpecificDatumReader in project flink by apache.
the class RollingSinkITCase method testNonRollingAvroKeyValueWithoutCompressionWriter.
/**
* This tests {@link AvroKeyValueSinkWriter}
* with non-rolling output and without compression.
*/
@Test
public void testNonRollingAvroKeyValueWithoutCompressionWriter() throws Exception {
final int NUM_ELEMENTS = 20;
final int PARALLELISM = 2;
final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out";
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
Map<String, String> properties = new HashMap<>();
Schema keySchema = Schema.create(Type.INT);
Schema valueSchema = Schema.create(Type.STRING);
properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
RollingSink<Tuple2<Integer, String>> sink = new RollingSink<Tuple2<Integer, String>>(outPath).setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties)).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
source.addSink(sink);
env.execute("RollingSink Avro KeyValue Writer Test");
GenericData.setStringType(valueSchema, StringType.String);
Schema elementSchema = AvroKeyValue.getSchema(keySchema, valueSchema);
FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<GenericRecord>(elementSchema);
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(inStream, elementReader);
for (int i = 0; i < NUM_ELEMENTS; i += 2) {
AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValue<Integer, String>(dataFileStream.next());
int key = wrappedEntry.getKey().intValue();
Assert.assertEquals(i, key);
String value = wrappedEntry.getValue();
Assert.assertEquals("message #" + i, value);
}
dataFileStream.close();
inStream.close();
inStream = dfs.open(new Path(outPath + "/part-1-0"));
dataFileStream = new DataFileStream<GenericRecord>(inStream, elementReader);
for (int i = 1; i < NUM_ELEMENTS; i += 2) {
AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValue<Integer, String>(dataFileStream.next());
int key = wrappedEntry.getKey().intValue();
Assert.assertEquals(i, key);
String value = wrappedEntry.getValue();
Assert.assertEquals("message #" + i, value);
}
dataFileStream.close();
inStream.close();
}
use of org.apache.avro.specific.SpecificDatumReader in project flink by apache.
the class BucketingSinkTest method testUserDefinedConfiguration.
/**
* This tests user defined hdfs configuration
* @throws Exception
*/
@Test
public void testUserDefinedConfiguration() throws Exception {
final String outPath = hdfsURI + "/string-non-rolling-with-config";
final int numElements = 20;
Map<String, String> properties = new HashMap<>();
Schema keySchema = Schema.create(Schema.Type.INT);
Schema valueSchema = Schema.create(Schema.Type.STRING);
properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);
Configuration conf = new Configuration();
conf.set("io.file.buffer.size", "40960");
BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath).setFSConfig(conf).setWriter(new StreamWriterWithConfigCheck<Integer, String>(properties, "io.file.buffer.size", "40960")).setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()).setPartPrefix(PART_PREFIX).setPendingPrefix("").setPendingSuffix("");
OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0);
testHarness.setProcessingTime(0L);
testHarness.setup();
testHarness.open();
for (int i = 0; i < numElements; i++) {
testHarness.processElement(new StreamRecord<>(Tuple2.of(i, "message #" + Integer.toString(i))));
}
testHarness.close();
GenericData.setStringType(valueSchema, GenericData.StringType.String);
Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);
FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));
SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
for (int i = 0; i < numElements; i++) {
AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
int key = wrappedEntry.getKey();
Assert.assertEquals(i, key);
String value = wrappedEntry.getValue();
Assert.assertEquals("message #" + i, value);
}
dataFileStream.close();
inStream.close();
}
use of org.apache.avro.specific.SpecificDatumReader in project flink by apache.
the class BucketingSinkTest method testNonRollingAvroKeyValueWithCompressionWriter.
/**
* This tests {@link AvroKeyValueSinkWriter}
* with non-rolling output and with compression.
*/
@Test
public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception {
final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out";
final int numElements = 20;
Map<String, String> properties = new HashMap<>();
Schema keySchema = Schema.create(Schema.Type.INT);
Schema valueSchema = Schema.create(Schema.Type.STRING);
properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);
BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath).setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties)).setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()).setPartPrefix(PART_PREFIX).setPendingPrefix("").setPendingSuffix("");
OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0);
testHarness.setProcessingTime(0L);
testHarness.setup();
testHarness.open();
for (int i = 0; i < numElements; i++) {
testHarness.processElement(new StreamRecord<>(Tuple2.of(i, "message #" + Integer.toString(i))));
}
testHarness.close();
GenericData.setStringType(valueSchema, GenericData.StringType.String);
Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);
FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));
SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
for (int i = 0; i < numElements; i++) {
AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
int key = wrappedEntry.getKey();
Assert.assertEquals(i, key);
String value = wrappedEntry.getValue();
Assert.assertEquals("message #" + i, value);
}
dataFileStream.close();
inStream.close();
}
use of org.apache.avro.specific.SpecificDatumReader in project camel by apache.
the class AvroDataFormat method unmarshal.
public Object unmarshal(Exchange exchange, InputStream inputStream) throws Exception {
ObjectHelper.notNull(actualSchema, "schema", this);
ClassLoader classLoader = null;
Class<?> clazz = camelContext.getClassResolver().resolveClass(actualSchema.getFullName());
if (clazz != null) {
classLoader = clazz.getClassLoader();
}
SpecificData specificData = new SpecificDataNoCache(classLoader);
DatumReader<GenericRecord> reader = new SpecificDatumReader<GenericRecord>(null, null, specificData);
reader.setSchema(actualSchema);
Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
Object result = reader.read(null, decoder);
return result;
}
use of org.apache.avro.specific.SpecificDatumReader in project gora by apache.
the class SolrStore method deserializeFieldValue.
@SuppressWarnings("unchecked")
private Object deserializeFieldValue(Field field, Schema fieldSchema, Object solrValue, T persistent) throws IOException {
Object fieldValue = null;
switch(fieldSchema.getType()) {
case MAP:
case ARRAY:
case RECORD:
@SuppressWarnings("rawtypes") SpecificDatumReader reader = getDatumReader(fieldSchema);
fieldValue = IOUtils.deserialize((byte[]) solrValue, reader, persistent.get(field.pos()));
break;
case ENUM:
fieldValue = AvroUtils.getEnumValue(fieldSchema, (String) solrValue);
break;
case FIXED:
throw new IOException("???");
// break;
case BYTES:
fieldValue = ByteBuffer.wrap((byte[]) solrValue);
break;
case STRING:
fieldValue = new Utf8(solrValue.toString());
break;
case UNION:
if (fieldSchema.getTypes().size() == 2 && isNullable(fieldSchema)) {
// schema [type0, type1]
Type type0 = fieldSchema.getTypes().get(0).getType();
Type type1 = fieldSchema.getTypes().get(1).getType();
// ["null","type"] or ["type","null"]
if (!type0.equals(type1)) {
if (type0.equals(Schema.Type.NULL))
fieldSchema = fieldSchema.getTypes().get(1);
else
fieldSchema = fieldSchema.getTypes().get(0);
} else {
fieldSchema = fieldSchema.getTypes().get(0);
}
fieldValue = deserializeFieldValue(field, fieldSchema, solrValue, persistent);
} else {
@SuppressWarnings("rawtypes") SpecificDatumReader unionReader = getDatumReader(fieldSchema);
fieldValue = IOUtils.deserialize((byte[]) solrValue, unionReader, persistent.get(field.pos()));
break;
}
break;
default:
fieldValue = solrValue;
}
return fieldValue;
}
Aggregations