Search in sources :

Example 11 with SpecificDatumReader

use of org.apache.avro.specific.SpecificDatumReader in project flink by apache.

the class RollingSinkITCase method testNonRollingAvroKeyValueWithoutCompressionWriter.

/**
	 * This tests {@link AvroKeyValueSinkWriter}
	 * with non-rolling output and without compression.
	 */
@Test
public void testNonRollingAvroKeyValueWithoutCompressionWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
    Map<String, String> properties = new HashMap<>();
    Schema keySchema = Schema.create(Type.INT);
    Schema valueSchema = Schema.create(Type.STRING);
    properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
    properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
    RollingSink<Tuple2<Integer, String>> sink = new RollingSink<Tuple2<Integer, String>>(outPath).setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties)).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
    source.addSink(sink);
    env.execute("RollingSink Avro KeyValue Writer Test");
    GenericData.setStringType(valueSchema, StringType.String);
    Schema elementSchema = AvroKeyValue.getSchema(keySchema, valueSchema);
    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
    SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<GenericRecord>(elementSchema);
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(inStream, elementReader);
    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
        AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValue<Integer, String>(dataFileStream.next());
        int key = wrappedEntry.getKey().intValue();
        Assert.assertEquals(i, key);
        String value = wrappedEntry.getValue();
        Assert.assertEquals("message #" + i, value);
    }
    dataFileStream.close();
    inStream.close();
    inStream = dfs.open(new Path(outPath + "/part-1-0"));
    dataFileStream = new DataFileStream<GenericRecord>(inStream, elementReader);
    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
        AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValue<Integer, String>(dataFileStream.next());
        int key = wrappedEntry.getKey().intValue();
        Assert.assertEquals(i, key);
        String value = wrappedEntry.getValue();
        Assert.assertEquals("message #" + i, value);
    }
    dataFileStream.close();
    inStream.close();
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) DataFileStream(org.apache.avro.file.DataFileStream) AvroKeyValue(org.apache.flink.streaming.connectors.fs.AvroKeyValueSinkWriter.AvroKeyValue) Tuple2(org.apache.flink.api.java.tuple.Tuple2) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 12 with SpecificDatumReader

use of org.apache.avro.specific.SpecificDatumReader in project flink by apache.

the class BucketingSinkTest method testUserDefinedConfiguration.

/**
	 * This tests user defined hdfs configuration
	 * @throws Exception
	 */
@Test
public void testUserDefinedConfiguration() throws Exception {
    final String outPath = hdfsURI + "/string-non-rolling-with-config";
    final int numElements = 20;
    Map<String, String> properties = new HashMap<>();
    Schema keySchema = Schema.create(Schema.Type.INT);
    Schema valueSchema = Schema.create(Schema.Type.STRING);
    properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
    properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
    properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
    properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);
    Configuration conf = new Configuration();
    conf.set("io.file.buffer.size", "40960");
    BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath).setFSConfig(conf).setWriter(new StreamWriterWithConfigCheck<Integer, String>(properties, "io.file.buffer.size", "40960")).setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()).setPartPrefix(PART_PREFIX).setPendingPrefix("").setPendingSuffix("");
    OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0);
    testHarness.setProcessingTime(0L);
    testHarness.setup();
    testHarness.open();
    for (int i = 0; i < numElements; i++) {
        testHarness.processElement(new StreamRecord<>(Tuple2.of(i, "message #" + Integer.toString(i))));
    }
    testHarness.close();
    GenericData.setStringType(valueSchema, GenericData.StringType.String);
    Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);
    FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));
    SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
    for (int i = 0; i < numElements; i++) {
        AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
        int key = wrappedEntry.getKey();
        Assert.assertEquals(i, key);
        String value = wrappedEntry.getValue();
        Assert.assertEquals("message #" + i, value);
    }
    dataFileStream.close();
    inStream.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) AvroKeyValueSinkWriter(org.apache.flink.streaming.connectors.fs.AvroKeyValueSinkWriter) Path(org.apache.hadoop.fs.Path) DataFileStream(org.apache.avro.file.DataFileStream) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) Tuple2(org.apache.flink.api.java.tuple.Tuple2) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 13 with SpecificDatumReader

use of org.apache.avro.specific.SpecificDatumReader in project flink by apache.

the class BucketingSinkTest method testNonRollingAvroKeyValueWithCompressionWriter.

/**
	 * This tests {@link AvroKeyValueSinkWriter}
	 * with non-rolling output and with compression.
	 */
@Test
public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception {
    final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out";
    final int numElements = 20;
    Map<String, String> properties = new HashMap<>();
    Schema keySchema = Schema.create(Schema.Type.INT);
    Schema valueSchema = Schema.create(Schema.Type.STRING);
    properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
    properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
    properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
    properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);
    BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath).setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties)).setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()).setPartPrefix(PART_PREFIX).setPendingPrefix("").setPendingSuffix("");
    OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0);
    testHarness.setProcessingTime(0L);
    testHarness.setup();
    testHarness.open();
    for (int i = 0; i < numElements; i++) {
        testHarness.processElement(new StreamRecord<>(Tuple2.of(i, "message #" + Integer.toString(i))));
    }
    testHarness.close();
    GenericData.setStringType(valueSchema, GenericData.StringType.String);
    Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);
    FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));
    SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
    for (int i = 0; i < numElements; i++) {
        AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
        int key = wrappedEntry.getKey();
        Assert.assertEquals(i, key);
        String value = wrappedEntry.getValue();
        Assert.assertEquals("message #" + i, value);
    }
    dataFileStream.close();
    inStream.close();
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) DataFileStream(org.apache.avro.file.DataFileStream) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) Tuple2(org.apache.flink.api.java.tuple.Tuple2) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) AvroKeyValueSinkWriter(org.apache.flink.streaming.connectors.fs.AvroKeyValueSinkWriter) Test(org.junit.Test)

Example 14 with SpecificDatumReader

use of org.apache.avro.specific.SpecificDatumReader in project camel by apache.

the class AvroDataFormat method unmarshal.

public Object unmarshal(Exchange exchange, InputStream inputStream) throws Exception {
    ObjectHelper.notNull(actualSchema, "schema", this);
    ClassLoader classLoader = null;
    Class<?> clazz = camelContext.getClassResolver().resolveClass(actualSchema.getFullName());
    if (clazz != null) {
        classLoader = clazz.getClassLoader();
    }
    SpecificData specificData = new SpecificDataNoCache(classLoader);
    DatumReader<GenericRecord> reader = new SpecificDatumReader<GenericRecord>(null, null, specificData);
    reader.setSchema(actualSchema);
    Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
    Object result = reader.read(null, decoder);
    return result;
}
Also used : SpecificData(org.apache.avro.specific.SpecificData) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) Decoder(org.apache.avro.io.Decoder)

Example 15 with SpecificDatumReader

use of org.apache.avro.specific.SpecificDatumReader in project gora by apache.

the class SolrStore method deserializeFieldValue.

@SuppressWarnings("unchecked")
private Object deserializeFieldValue(Field field, Schema fieldSchema, Object solrValue, T persistent) throws IOException {
    Object fieldValue = null;
    switch(fieldSchema.getType()) {
        case MAP:
        case ARRAY:
        case RECORD:
            @SuppressWarnings("rawtypes") SpecificDatumReader reader = getDatumReader(fieldSchema);
            fieldValue = IOUtils.deserialize((byte[]) solrValue, reader, persistent.get(field.pos()));
            break;
        case ENUM:
            fieldValue = AvroUtils.getEnumValue(fieldSchema, (String) solrValue);
            break;
        case FIXED:
            throw new IOException("???");
        // break;
        case BYTES:
            fieldValue = ByteBuffer.wrap((byte[]) solrValue);
            break;
        case STRING:
            fieldValue = new Utf8(solrValue.toString());
            break;
        case UNION:
            if (fieldSchema.getTypes().size() == 2 && isNullable(fieldSchema)) {
                // schema [type0, type1]
                Type type0 = fieldSchema.getTypes().get(0).getType();
                Type type1 = fieldSchema.getTypes().get(1).getType();
                // ["null","type"] or ["type","null"]
                if (!type0.equals(type1)) {
                    if (type0.equals(Schema.Type.NULL))
                        fieldSchema = fieldSchema.getTypes().get(1);
                    else
                        fieldSchema = fieldSchema.getTypes().get(0);
                } else {
                    fieldSchema = fieldSchema.getTypes().get(0);
                }
                fieldValue = deserializeFieldValue(field, fieldSchema, solrValue, persistent);
            } else {
                @SuppressWarnings("rawtypes") SpecificDatumReader unionReader = getDatumReader(fieldSchema);
                fieldValue = IOUtils.deserialize((byte[]) solrValue, unionReader, persistent.get(field.pos()));
                break;
            }
            break;
        default:
            fieldValue = solrValue;
    }
    return fieldValue;
}
Also used : Type(org.apache.avro.Schema.Type) Utf8(org.apache.avro.util.Utf8) IOException(java.io.IOException) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader)

Aggregations

SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)17 Schema (org.apache.avro.Schema)6 HashMap (java.util.HashMap)5 GenericRecord (org.apache.avro.generic.GenericRecord)5 BinaryDecoder (org.apache.avro.io.BinaryDecoder)5 Test (org.junit.Test)5 DataFileStream (org.apache.avro.file.DataFileStream)4 Utf8 (org.apache.avro.util.Utf8)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Path (org.apache.hadoop.fs.Path)4 DataFileReader (org.apache.avro.file.DataFileReader)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Type (org.apache.avro.Schema.Type)2 ReflectDatumReader (org.apache.avro.reflect.ReflectDatumReader)2 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)2 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)2 AvroKeyValueSinkWriter (org.apache.flink.streaming.connectors.fs.AvroKeyValueSinkWriter)2 AvroKeyValue (org.apache.flink.streaming.connectors.fs.AvroKeyValueSinkWriter.AvroKeyValue)2