Search in sources :

Example 1 with ReflectDatumReader

use of org.apache.avro.reflect.ReflectDatumReader in project beam by apache.

the class AvroIOTest method assertTestOutputs.

public static void assertTestOutputs(String[] expectedElements, int numShards, String outputFilePrefix, String shardNameTemplate) throws IOException {
    // Validate that the data written matches the expected elements in the expected order
    List<File> expectedFiles = new ArrayList<>();
    for (int i = 0; i < numShards; i++) {
        expectedFiles.add(new File(DefaultFilenamePolicy.constructName(outputFilePrefix, shardNameTemplate, "", /* no suffix */
        i, numShards)));
    }
    List<String> actualElements = new ArrayList<>();
    for (File outputFile : expectedFiles) {
        assertTrue("Expected output file " + outputFile.getName(), outputFile.exists());
        try (DataFileReader<String> reader = new DataFileReader<>(outputFile, new ReflectDatumReader(ReflectData.get().getSchema(String.class)))) {
            Iterators.addAll(actualElements, reader);
        }
    }
    assertThat(actualElements, containsInAnyOrder(expectedElements));
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) ArrayList(java.util.ArrayList) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader) File(java.io.File)

Example 2 with ReflectDatumReader

use of org.apache.avro.reflect.ReflectDatumReader in project flink by apache.

the class EncoderDecoderTest method testObjectSerialization.

private static <X> void testObjectSerialization(X obj) {
    try {
        // serialize
        ByteArrayOutputStream baos = new ByteArrayOutputStream(512);
        {
            DataOutputStream dataOut = new DataOutputStream(baos);
            DataOutputEncoder encoder = new DataOutputEncoder();
            encoder.setOut(dataOut);
            @SuppressWarnings("unchecked") Class<X> clazz = (Class<X>) obj.getClass();
            ReflectDatumWriter<X> writer = new ReflectDatumWriter<X>(clazz);
            writer.write(obj, encoder);
            dataOut.flush();
            dataOut.close();
        }
        byte[] data = baos.toByteArray();
        X result = null;
        // deserialize
        {
            ByteArrayInputStream bais = new ByteArrayInputStream(data);
            DataInputStream dataIn = new DataInputStream(bais);
            DataInputDecoder decoder = new DataInputDecoder();
            decoder.setIn(dataIn);
            @SuppressWarnings("unchecked") Class<X> clazz = (Class<X>) obj.getClass();
            ReflectDatumReader<X> reader = new ReflectDatumReader<X>(clazz);
            // create a reuse object if possible, otherwise we have no reuse object 
            X reuse = null;
            try {
                @SuppressWarnings("unchecked") X test = (X) obj.getClass().newInstance();
                reuse = test;
            } catch (Throwable t) {
            }
            result = reader.read(reuse, decoder);
        }
        // check
        final String message = "Deserialized object is not the same as the original";
        if (obj.getClass().isArray()) {
            Class<?> clazz = obj.getClass();
            if (clazz == byte[].class) {
                assertArrayEquals(message, (byte[]) obj, (byte[]) result);
            } else if (clazz == short[].class) {
                assertArrayEquals(message, (short[]) obj, (short[]) result);
            } else if (clazz == int[].class) {
                assertArrayEquals(message, (int[]) obj, (int[]) result);
            } else if (clazz == long[].class) {
                assertArrayEquals(message, (long[]) obj, (long[]) result);
            } else if (clazz == char[].class) {
                assertArrayEquals(message, (char[]) obj, (char[]) result);
            } else if (clazz == float[].class) {
                assertArrayEquals(message, (float[]) obj, (float[]) result, 0.0f);
            } else if (clazz == double[].class) {
                assertArrayEquals(message, (double[]) obj, (double[]) result, 0.0);
            } else {
                assertArrayEquals(message, (Object[]) obj, (Object[]) result);
            }
        } else {
            assertEquals(message, obj, result);
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test failed due to an exception: " + e.getMessage());
    }
}
Also used : DataOutputStream(java.io.DataOutputStream) ReflectDatumWriter(org.apache.avro.reflect.ReflectDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader)

Example 3 with ReflectDatumReader

use of org.apache.avro.reflect.ReflectDatumReader in project flink by apache.

the class AvroInputFormat method initReader.

private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
    DatumReader<E> datumReader;
    if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
        datumReader = new GenericDatumReader<E>();
    } else {
        datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
    }
    if (LOG.isInfoEnabled()) {
        LOG.info("Opening split {}", split);
    }
    SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
    DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
    }
    end = split.getStart() + split.getLength();
    recordsReadSinceLastSync = 0;
    return dataFileReader;
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) SeekableInput(org.apache.avro.file.SeekableInput) FSDataInputStreamWrapper(org.apache.flink.api.avro.FSDataInputStreamWrapper) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader)

Example 4 with ReflectDatumReader

use of org.apache.avro.reflect.ReflectDatumReader in project cdap by caskdata.

the class AvroStreamBodyConsumerTest method generateFile.

@Override
protected ContentInfo generateFile(final int recordCount) throws IOException {
    return new FileContentInfo(generateAvroFile(TMP_FOLDER.newFile(), recordCount)) {

        @Override
        public boolean verify(Map<String, String> headers, InputSupplier<? extends InputStream> contentSupplier) throws IOException {
            // Deserialize and verify the records
            Decoder decoder = DecoderFactory.get().binaryDecoder(contentSupplier.getInput(), null);
            DatumReader<Record> reader = new ReflectDatumReader<>(Record.class);
            reader.setSchema(new Schema.Parser().parse(headers.get("schema")));
            for (int i = 0; i < recordCount; i++) {
                Record record = reader.read(null, decoder);
                if (i != record.id) {
                    return false;
                }
                if (!("Record number " + i).equals(record.name)) {
                    return false;
                }
            }
            return true;
        }
    };
}
Also used : InputStream(java.io.InputStream) Decoder(org.apache.avro.io.Decoder) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader) Map(java.util.Map) InputSupplier(com.google.common.io.InputSupplier)

Example 5 with ReflectDatumReader

use of org.apache.avro.reflect.ReflectDatumReader in project flink by apache.

the class EncoderDecoderTest method testObjectSerialization.

private static <X> void testObjectSerialization(X obj) {
    try {
        // serialize
        ByteArrayOutputStream baos = new ByteArrayOutputStream(512);
        {
            DataOutputStream dataOut = new DataOutputStream(baos);
            DataOutputEncoder encoder = new DataOutputEncoder();
            encoder.setOut(dataOut);
            @SuppressWarnings("unchecked") Class<X> clazz = (Class<X>) obj.getClass();
            ReflectDatumWriter<X> writer = new ReflectDatumWriter<>(clazz);
            writer.write(obj, encoder);
            dataOut.flush();
            dataOut.close();
        }
        byte[] data = baos.toByteArray();
        X result;
        // deserialize
        {
            ByteArrayInputStream bais = new ByteArrayInputStream(data);
            DataInputStream dataIn = new DataInputStream(bais);
            DataInputDecoder decoder = new DataInputDecoder();
            decoder.setIn(dataIn);
            @SuppressWarnings("unchecked") Class<X> clazz = (Class<X>) obj.getClass();
            ReflectDatumReader<X> reader = new ReflectDatumReader<>(clazz);
            // create a reuse object if possible, otherwise we have no reuse object
            X reuse = null;
            try {
                @SuppressWarnings("unchecked") X test = (X) obj.getClass().newInstance();
                reuse = test;
            } catch (Throwable t) {
            // do nothing
            }
            result = reader.read(reuse, decoder);
        }
        // check
        final String message = "Deserialized object is not the same as the original";
        if (obj.getClass().isArray()) {
            Class<?> clazz = obj.getClass();
            if (clazz == byte[].class) {
                assertArrayEquals(message, (byte[]) obj, (byte[]) result);
            } else if (clazz == short[].class) {
                assertArrayEquals(message, (short[]) obj, (short[]) result);
            } else if (clazz == int[].class) {
                assertArrayEquals(message, (int[]) obj, (int[]) result);
            } else if (clazz == long[].class) {
                assertArrayEquals(message, (long[]) obj, (long[]) result);
            } else if (clazz == char[].class) {
                assertArrayEquals(message, (char[]) obj, (char[]) result);
            } else if (clazz == float[].class) {
                assertArrayEquals(message, (float[]) obj, (float[]) result, 0.0f);
            } else if (clazz == double[].class) {
                assertArrayEquals(message, (double[]) obj, (double[]) result, 0.0);
            } else {
                assertArrayEquals(message, (Object[]) obj, (Object[]) result);
            }
        } else {
            assertEquals(message, obj, result);
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test failed due to an exception: " + e.getMessage());
    }
}
Also used : DataOutputEncoder(org.apache.flink.formats.avro.utils.DataOutputEncoder) DataOutputStream(java.io.DataOutputStream) ReflectDatumWriter(org.apache.avro.reflect.ReflectDatumWriter) DataInputDecoder(org.apache.flink.formats.avro.utils.DataInputDecoder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader)

Aggregations

ReflectDatumReader (org.apache.avro.reflect.ReflectDatumReader)12 DataFileReader (org.apache.avro.file.DataFileReader)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 ReflectDatumWriter (org.apache.avro.reflect.ReflectDatumWriter)4 SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)4 File (java.io.File)3 ArrayList (java.util.ArrayList)3 Schema (org.apache.avro.Schema)3 DataInputStream (java.io.DataInputStream)2 DataOutputStream (java.io.DataOutputStream)2 SeekableInput (org.apache.avro.file.SeekableInput)2 InputSupplier (com.google.common.io.InputSupplier)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Map (java.util.Map)1 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)1 BinaryDecoder (org.apache.avro.io.BinaryDecoder)1 BinaryEncoder (org.apache.avro.io.BinaryEncoder)1 Decoder (org.apache.avro.io.Decoder)1