Search in sources :

Example 11 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project beam by apache.

the class AvroIOTest method testAvroIOCompressedWriteAndReadASingleFile.

@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testAvroIOCompressedWriteAndReadASingleFile() throws Throwable {
    List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
    File outputFile = tmpFolder.newFile("output.avro");
    p.apply(Create.of(values)).apply(AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding().withCodec(CodecFactory.deflateCodec(9)));
    p.run();
    PCollection<GenericClass> input = p.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()));
    PAssert.that(input).containsInAnyOrder(values);
    p.run();
    DataFileStream dataFileStream = new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader());
    assertEquals("deflate", dataFileStream.getMetaString("avro.codec"));
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DataFileStream(org.apache.avro.file.DataFileStream) File(java.io.File) FileInputStream(java.io.FileInputStream) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 12 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project beam by apache.

the class AvroPipelineTest method readGenericFile.

private List<GenericRecord> readGenericFile() throws IOException {
    List<GenericRecord> records = Lists.newArrayList();
    GenericDatumReader<GenericRecord> genericDatumReader = new GenericDatumReader<>();
    try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputDir + "-00000-of-00001"), genericDatumReader)) {
        for (GenericRecord record : dataFileReader) {
            records.add(record);
        }
    }
    return records;
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 13 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.

the class ClientConfigUtil method readSingleClientConfigAvro.

/**
     * Parses a string that contains single fat client config string in avro
     * format
     * 
     * @param configAvro Input string of avro format, that contains config for
     *        multiple stores
     * @return Properties of single fat client config
     */
@SuppressWarnings("unchecked")
public static Properties readSingleClientConfigAvro(String configAvro) {
    Properties props = new Properties();
    try {
        JsonDecoder decoder = new JsonDecoder(CLIENT_CONFIG_AVRO_SCHEMA, configAvro);
        GenericDatumReader<Object> datumReader = new GenericDatumReader<Object>(CLIENT_CONFIG_AVRO_SCHEMA);
        Map<Utf8, Utf8> flowMap = (Map<Utf8, Utf8>) datumReader.read(null, decoder);
        for (Utf8 key : flowMap.keySet()) {
            props.put(key.toString(), flowMap.get(key).toString());
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return props;
}
Also used : JsonDecoder(org.apache.avro.io.JsonDecoder) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Utf8(org.apache.avro.util.Utf8) Properties(java.util.Properties) Map(java.util.Map)

Example 14 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project flink by apache.

the class AvroRecordInputFormatTest method testDeserializeToGenericType.

/**
	 * Test if the Flink serialization is able to properly process GenericData.Record types.
	 * Usually users of Avro generate classes (POJOs) from Avro schemas.
	 * However, if generated classes are not available, one can also use GenericData.Record.
	 * It is an untyped key-value record which is using a schema to validate the correctness of the data.
	 *
	 * It is not recommended to use GenericData.Record with Flink. Use generated POJOs instead.
	 */
@Test
public void testDeserializeToGenericType() throws IOException {
    DatumReader<GenericData.Record> datumReader = new GenericDatumReader<>(userSchema);
    try (FileReader<GenericData.Record> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
        // initialize Record by reading it from disk (thats easier than creating it by hand)
        GenericData.Record rec = new GenericData.Record(userSchema);
        dataFileReader.next(rec);
        // check if record has been read correctly
        assertNotNull(rec);
        assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
        assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());
        // it is null for the first record.
        assertEquals(null, rec.get("type_long_test"));
        // now serialize it with our framework:
        TypeInformation<GenericData.Record> te = TypeExtractor.createTypeInfo(GenericData.Record.class);
        ExecutionConfig ec = new ExecutionConfig();
        Assert.assertEquals(GenericTypeInfo.class, te.getClass());
        Serializers.recursivelyRegisterType(te.getTypeClass(), ec, new HashSet<Class<?>>());
        TypeSerializer<GenericData.Record> tser = te.createSerializer(ec);
        Assert.assertEquals(1, ec.getDefaultKryoSerializerClasses().size());
        Assert.assertTrue(ec.getDefaultKryoSerializerClasses().containsKey(Schema.class) && ec.getDefaultKryoSerializerClasses().get(Schema.class).equals(Serializers.AvroSchemaSerializer.class));
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
            tser.serialize(rec, outView);
        }
        GenericData.Record newRec;
        try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(new ByteArrayInputStream(out.toByteArray()))) {
            newRec = tser.deserialize(inView);
        }
        // check if it is still the same
        assertNotNull(newRec);
        assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.get("type_enum").toString());
        assertEquals("name not equal", TEST_NAME, newRec.get("name").toString());
        assertEquals(null, newRec.get("type_long_test"));
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericData(org.apache.avro.generic.GenericData) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) ByteArrayInputStream(java.io.ByteArrayInputStream) GenericRecord(org.apache.avro.generic.GenericRecord) Serializers(org.apache.flink.api.java.typeutils.runtime.kryo.Serializers) Test(org.junit.Test)

Example 15 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project hive by apache.

the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.

@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
    // Need to verify that when reading a datum with an updated reader schema
    // that the datum then returns the reader schema as its own, since we
    // depend on this behavior in order to avoid re-encoding the datum
    // in the serde.
    String v0 = "{\n" + "    \"namespace\": \"org.apache.hadoop.hive\",\n" + "    \"name\": \"SomeStuff\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"v0\",\n" + "            \"type\":\"string\"\n" + "        }\n" + "    ]\n" + "}";
    String v1 = "{\n" + "    \"namespace\": \"org.apache.hadoop.hive\",\n" + "    \"name\": \"SomeStuff\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"v0\",\n" + "            \"type\":\"string\"\n" + "        },\n" + "        {\n" + "            \"name\":\"v1\",\n" + "            \"type\":\"string\",\n" + "            \"default\":\"v1_default\"" + "        }\n" + "    ]\n" + "}";
    Schema[] schemas = { AvroSerdeUtils.getSchemaFor(v0), AvroSerdeUtils.getSchemaFor(v1) };
    // Encode a schema with v0, write out.
    GenericRecord record = new GenericData.Record(schemas[0]);
    record.put("v0", "v0 value");
    assertTrue(GenericData.get().validate(schemas[0], record));
    // Write datum out to a stream
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    dfw.create(schemas[0], baos);
    dfw.append(record);
    dfw.close();
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
    gdr.setExpected(schemas[1]);
    DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
    assertTrue(dfs.hasNext());
    GenericRecord next = dfs.next();
    assertEquals("v0 value", next.get("v0").toString());
    assertEquals("v1_default", next.get("v1").toString());
    // Now the most important check - when we query this record for its schema,
    // we should get back the latest, reader schema:
    assertEquals(schemas[1], next.getSchema());
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) ByteArrayInputStream(java.io.ByteArrayInputStream) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

GenericDatumReader (org.apache.avro.generic.GenericDatumReader)46 GenericRecord (org.apache.avro.generic.GenericRecord)31 Schema (org.apache.avro.Schema)20 IOException (java.io.IOException)15 File (java.io.File)10 DataFileStream (org.apache.avro.file.DataFileStream)10 Decoder (org.apache.avro.io.Decoder)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 GenericData (org.apache.avro.generic.GenericData)7 DataFileReader (org.apache.avro.file.DataFileReader)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 JsonDecoder (org.apache.avro.io.JsonDecoder)5 ParseException (io.druid.java.util.common.parsers.ParseException)4 FileInputStream (java.io.FileInputStream)4 DataFileWriter (org.apache.avro.file.DataFileWriter)4 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 Map (java.util.Map)3 ChannelBufferInputStream (org.jboss.netty.buffer.ChannelBufferInputStream)3