Search in sources :

Example 26 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project parquet-mr by apache.

the class TestReadWrite method testNestedLists.

@Test
public void testNestedLists() throws Exception {
    Schema schema = new Schema.Parser().parse(Resources.getResource("nested_array.avsc").openStream());
    Path file = new Path(createTempFile().getPath());
    // Parquet writer
    ParquetWriter parquetWriter = AvroParquetWriter.builder(file).withSchema(schema).withConf(testConf).build();
    Schema innerRecordSchema = schema.getField("l1").schema().getTypes().get(1).getElementType().getTypes().get(1);
    GenericRecord record = new GenericRecordBuilder(schema).set("l1", Collections.singletonList(new GenericRecordBuilder(innerRecordSchema).set("l2", Collections.singletonList("hello")).build())).build();
    parquetWriter.write(record);
    parquetWriter.close();
    AvroParquetReader<GenericRecord> reader = new AvroParquetReader(testConf, file);
    GenericRecord nextRecord = reader.read();
    assertNotNull(nextRecord);
    assertNotNull(nextRecord.get("l1"));
    List l1List = (List) nextRecord.get("l1");
    assertNotNull(l1List.get(0));
    List l2List = (List) ((GenericRecord) l1List.get(0)).get("l2");
    assertEquals(str("hello"), l2List.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 27 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project parquet-mr by apache.

the class TestReadWrite method testDecimalValues.

@Test
public void testDecimalValues() throws Exception {
    Schema decimalSchema = Schema.createRecord("myrecord", null, null, false);
    Schema decimal = LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES));
    decimalSchema.setFields(Collections.singletonList(new Schema.Field("dec", decimal, null, null)));
    // add the decimal conversion to a generic data model
    GenericData decimalSupport = new GenericData();
    decimalSupport.addLogicalTypeConversion(new Conversions.DecimalConversion());
    File file = temp.newFile("decimal.parquet");
    file.delete();
    Path path = new Path(file.toString());
    List<GenericRecord> expected = Lists.newArrayList();
    try (ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(path).withDataModel(decimalSupport).withSchema(decimalSchema).build()) {
        Random random = new Random(34L);
        GenericRecordBuilder builder = new GenericRecordBuilder(decimalSchema);
        for (int i = 0; i < 1000; i += 1) {
            // Generating Integers between -(2^29) and (2^29 - 1) to ensure the number of digits <= 9
            BigDecimal dec = new BigDecimal(new BigInteger(30, random).subtract(BigInteger.valueOf(1L << 28)), 2);
            builder.set("dec", dec);
            GenericRecord rec = builder.build();
            expected.add(rec);
            writer.write(builder.build());
        }
    }
    List<GenericRecord> records = Lists.newArrayList();
    try (ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(path).withDataModel(decimalSupport).disableCompatibility().build()) {
        GenericRecord rec;
        while ((rec = reader.read()) != null) {
            records.add(rec);
        }
    }
    Assert.assertTrue("dec field should be a BigDecimal instance", records.get(0).get("dec") instanceof BigDecimal);
    Assert.assertEquals("Content should match", expected, records);
}
Also used : Path(org.apache.hadoop.fs.Path) Schema(org.apache.avro.Schema) GenericData(org.apache.avro.generic.GenericData) BigDecimal(java.math.BigDecimal) Conversions(org.apache.avro.Conversions) Random(java.util.Random) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) BigInteger(java.math.BigInteger) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Example 28 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project parquet-mr by apache.

the class TestReadWriteOldListBehavior method testMapWithUtf8Key.

@Test
public void testMapWithUtf8Key() throws Exception {
    Schema schema = new Schema.Parser().parse(Resources.getResource("map.avsc").openStream());
    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
    tmp.deleteOnExit();
    tmp.delete();
    Path file = new Path(tmp.getPath());
    try (AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema)) {
        // Write a record with a map with Utf8 keys.
        GenericData.Record record = new GenericRecordBuilder(schema).set("mymap", ImmutableMap.of(new Utf8("a"), 1, new Utf8("b"), 2)).build();
        writer.write(record);
    }
    try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<>(testConf, file)) {
        GenericRecord nextRecord = reader.read();
        assertNotNull(nextRecord);
        assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Schema(org.apache.avro.Schema) GenericData(org.apache.avro.generic.GenericData) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Example 29 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project parquet-mr by apache.

the class TestReadWriteOldListBehavior method testAllUsingDefaultAvroSchema.

@Test
public void testAllUsingDefaultAvroSchema() throws Exception {
    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
    tmp.deleteOnExit();
    tmp.delete();
    Path file = new Path(tmp.getPath());
    // write file using Parquet APIs
    try (ParquetWriter<Map<String, Object>> parquetWriter = new ParquetWriter<Map<String, Object>>(file, new WriteSupport<Map<String, Object>>() {

        private RecordConsumer recordConsumer;

        @Override
        public WriteContext init(Configuration configuration) {
            return new WriteContext(MessageTypeParser.parseMessageType(TestAvroSchemaConverter.ALL_PARQUET_SCHEMA), new HashMap<String, String>());
        }

        @Override
        public void prepareForWrite(RecordConsumer recordConsumer) {
            this.recordConsumer = recordConsumer;
        }

        @Override
        public void write(Map<String, Object> record) {
            recordConsumer.startMessage();
            int index = 0;
            recordConsumer.startField("myboolean", index);
            recordConsumer.addBoolean((Boolean) record.get("myboolean"));
            recordConsumer.endField("myboolean", index++);
            recordConsumer.startField("myint", index);
            recordConsumer.addInteger((Integer) record.get("myint"));
            recordConsumer.endField("myint", index++);
            recordConsumer.startField("mylong", index);
            recordConsumer.addLong((Long) record.get("mylong"));
            recordConsumer.endField("mylong", index++);
            recordConsumer.startField("myfloat", index);
            recordConsumer.addFloat((Float) record.get("myfloat"));
            recordConsumer.endField("myfloat", index++);
            recordConsumer.startField("mydouble", index);
            recordConsumer.addDouble((Double) record.get("mydouble"));
            recordConsumer.endField("mydouble", index++);
            recordConsumer.startField("mybytes", index);
            recordConsumer.addBinary(Binary.fromReusedByteBuffer((ByteBuffer) record.get("mybytes")));
            recordConsumer.endField("mybytes", index++);
            recordConsumer.startField("mystring", index);
            recordConsumer.addBinary(Binary.fromString((String) record.get("mystring")));
            recordConsumer.endField("mystring", index++);
            recordConsumer.startField("mynestedrecord", index);
            recordConsumer.startGroup();
            recordConsumer.startField("mynestedint", 0);
            recordConsumer.addInteger((Integer) record.get("mynestedint"));
            recordConsumer.endField("mynestedint", 0);
            recordConsumer.endGroup();
            recordConsumer.endField("mynestedrecord", index++);
            recordConsumer.startField("myenum", index);
            recordConsumer.addBinary(Binary.fromString((String) record.get("myenum")));
            recordConsumer.endField("myenum", index++);
            recordConsumer.startField("myarray", index);
            recordConsumer.startGroup();
            recordConsumer.startField("array", 0);
            for (int val : (int[]) record.get("myarray")) {
                recordConsumer.addInteger(val);
            }
            recordConsumer.endField("array", 0);
            recordConsumer.endGroup();
            recordConsumer.endField("myarray", index++);
            recordConsumer.startField("myoptionalarray", index);
            recordConsumer.startGroup();
            recordConsumer.startField("array", 0);
            for (int val : (int[]) record.get("myoptionalarray")) {
                recordConsumer.addInteger(val);
            }
            recordConsumer.endField("array", 0);
            recordConsumer.endGroup();
            recordConsumer.endField("myoptionalarray", index++);
            recordConsumer.startField("myarrayofoptional", index);
            recordConsumer.startGroup();
            recordConsumer.startField("list", 0);
            for (Integer val : (Integer[]) record.get("myarrayofoptional")) {
                recordConsumer.startGroup();
                if (val != null) {
                    recordConsumer.startField("element", 0);
                    recordConsumer.addInteger(val);
                    recordConsumer.endField("element", 0);
                }
                recordConsumer.endGroup();
            }
            recordConsumer.endField("list", 0);
            recordConsumer.endGroup();
            recordConsumer.endField("myarrayofoptional", index++);
            recordConsumer.startField("myrecordarray", index);
            recordConsumer.startGroup();
            recordConsumer.startField("array", 0);
            recordConsumer.startGroup();
            recordConsumer.startField("a", 0);
            for (int val : (int[]) record.get("myrecordarraya")) {
                recordConsumer.addInteger(val);
            }
            recordConsumer.endField("a", 0);
            recordConsumer.startField("b", 1);
            for (int val : (int[]) record.get("myrecordarrayb")) {
                recordConsumer.addInteger(val);
            }
            recordConsumer.endField("b", 1);
            recordConsumer.endGroup();
            recordConsumer.endField("array", 0);
            recordConsumer.endGroup();
            recordConsumer.endField("myrecordarray", index++);
            recordConsumer.startField("mymap", index);
            recordConsumer.startGroup();
            recordConsumer.startField("map", 0);
            recordConsumer.startGroup();
            Map<String, Integer> mymap = (Map<String, Integer>) record.get("mymap");
            recordConsumer.startField("key", 0);
            for (String key : mymap.keySet()) {
                recordConsumer.addBinary(Binary.fromString(key));
            }
            recordConsumer.endField("key", 0);
            recordConsumer.startField("value", 1);
            for (int val : mymap.values()) {
                recordConsumer.addInteger(val);
            }
            recordConsumer.endField("value", 1);
            recordConsumer.endGroup();
            recordConsumer.endField("map", 0);
            recordConsumer.endGroup();
            recordConsumer.endField("mymap", index++);
            recordConsumer.startField("myfixed", index);
            recordConsumer.addBinary(Binary.fromReusedByteArray((byte[]) record.get("myfixed")));
            recordConsumer.endField("myfixed", index++);
            recordConsumer.endMessage();
        }
    })) {
        Map<String, Object> record = new HashMap<String, Object>();
        record.put("myboolean", true);
        record.put("myint", 1);
        record.put("mylong", 2L);
        record.put("myfloat", 3.1f);
        record.put("mydouble", 4.1);
        record.put("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)));
        record.put("mystring", "hello");
        record.put("myenum", "a");
        record.put("mynestedint", 1);
        record.put("myarray", new int[] { 1, 2, 3 });
        record.put("myoptionalarray", new int[] { 1, 2, 3 });
        record.put("myarrayofoptional", new Integer[] { 1, null, 2, null, 3 });
        record.put("myrecordarraya", new int[] { 1, 2, 3 });
        record.put("myrecordarrayb", new int[] { 4, 5, 6 });
        record.put("mymap", ImmutableMap.of("a", 1, "b", 2));
        record.put("myfixed", new byte[] { (byte) 65 });
        parquetWriter.write(record);
    }
    Schema nestedRecordSchema = Schema.createRecord("mynestedrecord", null, null, false);
    nestedRecordSchema.setFields(Arrays.asList(new Schema.Field("mynestedint", Schema.create(Schema.Type.INT), null, null)));
    GenericData.Record nestedRecord = new GenericRecordBuilder(nestedRecordSchema).set("mynestedint", 1).build();
    List<Integer> integerArray = Arrays.asList(1, 2, 3);
    Schema recordArraySchema = Schema.createRecord("array", null, null, false);
    recordArraySchema.setFields(Arrays.asList(new Schema.Field("a", Schema.create(Schema.Type.INT), null, null), new Schema.Field("b", Schema.create(Schema.Type.INT), null, null)));
    GenericRecordBuilder builder = new GenericRecordBuilder(recordArraySchema);
    List<GenericData.Record> recordArray = new ArrayList<GenericData.Record>();
    recordArray.add(builder.set("a", 1).set("b", 4).build());
    recordArray.add(builder.set("a", 2).set("b", 5).build());
    recordArray.add(builder.set("a", 3).set("b", 6).build());
    GenericData.Array<GenericData.Record> genericRecordArray = new GenericData.Array<GenericData.Record>(Schema.createArray(recordArraySchema), recordArray);
    GenericFixed genericFixed = new GenericData.Fixed(Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });
    // 3-level lists are deserialized with the extra layer present
    Schema elementSchema = record("list", optionalField("element", primitive(Schema.Type.INT)));
    GenericRecordBuilder elementBuilder = new GenericRecordBuilder(elementSchema);
    GenericData.Array<GenericData.Record> genericRecordArrayWithNullIntegers = new GenericData.Array<GenericData.Record>(array(elementSchema), Arrays.asList(elementBuilder.set("element", 1).build(), elementBuilder.set("element", null).build(), elementBuilder.set("element", 2).build(), elementBuilder.set("element", null).build(), elementBuilder.set("element", 3).build()));
    try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<>(testConf, file)) {
        GenericRecord nextRecord = reader.read();
        assertNotNull(nextRecord);
        assertEquals(true, nextRecord.get("myboolean"));
        assertEquals(1, nextRecord.get("myint"));
        assertEquals(2L, nextRecord.get("mylong"));
        assertEquals(3.1f, nextRecord.get("myfloat"));
        assertEquals(4.1, nextRecord.get("mydouble"));
        assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes"));
        assertEquals(str("hello"), nextRecord.get("mystring"));
        assertEquals(str("a"), nextRecord.get("myenum"));
        assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
        assertEquals(integerArray, nextRecord.get("myarray"));
        assertEquals(integerArray, nextRecord.get("myoptionalarray"));
        assertEquals(genericRecordArrayWithNullIntegers, nextRecord.get("myarrayofoptional"));
        assertEquals(genericRecordArray, nextRecord.get("myrecordarray"));
        assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
        assertEquals(genericFixed, nextRecord.get("myfixed"));
    }
}
Also used : GenericFixed(org.apache.avro.generic.GenericFixed) Configuration(org.apache.hadoop.conf.Configuration) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) AvroTestUtil.optionalField(org.apache.parquet.avro.AvroTestUtil.optionalField) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) GenericFixed(org.apache.avro.generic.GenericFixed) Path(org.apache.hadoop.fs.Path) GenericData(org.apache.avro.generic.GenericData) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 30 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project parquet-mr by apache.

the class TestReadWriteOldListBehavior method testMapWithNulls.

@Test
public void testMapWithNulls() throws Exception {
    Schema schema = new Schema.Parser().parse(Resources.getResource("map_with_nulls.avsc").openStream());
    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
    tmp.deleteOnExit();
    tmp.delete();
    Path file = new Path(tmp.getPath());
    Map<CharSequence, Integer> map = new HashMap<>();
    try (AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema)) {
        // Write a record with a null value
        map.put(str("thirty-four"), 34);
        map.put(str("eleventy-one"), null);
        map.put(str("one-hundred"), 100);
        GenericData.Record record = new GenericRecordBuilder(schema).set("mymap", map).build();
        writer.write(record);
    }
    try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<>(testConf, file)) {
        GenericRecord nextRecord = reader.read();
        assertNotNull(nextRecord);
        assertEquals(map, nextRecord.get("mymap"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) GenericData(org.apache.avro.generic.GenericData) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Aggregations

GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)368 GenericRecord (org.apache.avro.generic.GenericRecord)254 Test (org.junit.Test)242 Schema (org.apache.avro.Schema)199 GenericData (org.apache.avro.generic.GenericData)60 ArrayList (java.util.ArrayList)44 EnumTest (foo.bar.EnumTest)41 Schema (org.apache.kafka.connect.data.Schema)41 IndexedRecord (org.apache.avro.generic.IndexedRecord)39 File (java.io.File)37 SchemaAndValue (org.apache.kafka.connect.data.SchemaAndValue)35 Path (org.apache.hadoop.fs.Path)33 Struct (org.apache.kafka.connect.data.Struct)30 AvroSchema (io.confluent.kafka.schemaregistry.avro.AvroSchema)28 List (java.util.List)28 ByteArrayOutputStream (java.io.ByteArrayOutputStream)26 ByteBuffer (java.nio.ByteBuffer)26 HashMap (java.util.HashMap)25 Record (org.apache.avro.generic.GenericData.Record)25 Test (org.testng.annotations.Test)21