use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.
the class TestMapStructures method testStringMapOfOptionalIntArray.
@Test
public void testStringMapOfOptionalIntArray() throws Exception {
// tests a multimap structure for PARQUET-26
Path test = writeDirect("StringMapOfOptionalIntArray", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optionalGroup().as(LIST).repeatedGroup().optional(INT32).named("element").named("list").named("value").named("key_value").named("examples").named("StringMapOfOptionalIntArray"), new TestArrayCompatibility.DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("examples", 0);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("low"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("list", 0);
rc.startGroup();
rc.startField("element", 0);
rc.addInteger(34);
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
rc.startField("element", 0);
rc.addInteger(35);
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
// adds a null element
rc.endGroup();
rc.endField("list", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("high"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("list", 0);
rc.startGroup();
rc.startField("element", 0);
rc.addInteger(340);
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
rc.startField("element", 0);
rc.addInteger(360);
rc.endField("element", 0);
rc.endGroup();
rc.endField("list", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("examples", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new Text("low"), record(new IntWritable(34), new IntWritable(35), null)), record(new Text("high"), record(new IntWritable(340), new IntWritable(360))));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<int>>"));
}
use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.
the class TestMapStructures method testNestedMap.
@Test
public void testNestedMap() throws Exception {
Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().optional(BINARY).as(UTF8).named("key").optionalGroup().as(MAP).repeatedGroup().optional(BINARY).as(UTF8).named("key").required(INT32).named("value").named("key_value").named("value").named("key_value").named("map_of_maps").named("NestedMap"), new TestArrayCompatibility.DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("map_of_maps", 0);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("a"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("b"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.addInteger(1);
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("b"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("a"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.addInteger(-1);
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("b"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.addInteger(-2);
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("map_of_maps", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new Text("a"), record(record(new Text("b"), new IntWritable(1)))), record(new Text("b"), record(record(new Text("a"), new IntWritable(-1)), record(new Text("b"), new IntWritable(-2)))));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
deserialize(records.get(0), Arrays.asList("map_of_maps"), Arrays.asList("map<string,map<string,int>>"));
}
use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.
the class TestMapStructures method testStringMapOfOptionalArray.
@Test
public void testStringMapOfOptionalArray() throws Exception {
// tests a multimap structure
Path test = writeDirect("StringMapOfOptionalArray", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optionalGroup().as(LIST).repeatedGroup().optional(BINARY).as(UTF8).named("element").named("list").named("value").named("key_value").named("examples").named("StringMapOfOptionalArray"), new TestArrayCompatibility.DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("examples", 0);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("green"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("list", 0);
rc.startGroup();
rc.startField("element", 0);
rc.addBinary(Binary.fromString("lettuce"));
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
rc.startField("element", 0);
rc.addBinary(Binary.fromString("kale"));
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
// adds a null element
rc.endGroup();
rc.endField("list", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("brown"));
rc.endField("key", 0);
// no values array
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("examples", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new Text("green"), record(new Text("lettuce"), new Text("kale"), null)), record(new Text("brown"), null));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<string>>"));
}
use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.
the class TestArrayCompatibility method testAmbiguousSingleFieldGroupInList.
@Test
public void testAmbiguousSingleFieldGroupInList() throws Exception {
// this tests the case where older data has an ambiguous list and is not
// named indicating that the source considered the group significant
Path test = writeDirect("SingleFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(INT64).named("count").named("single_element_group").named("single_element_groups").named("SingleFieldGroupInList"), new DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("single_element_groups", 0);
rc.startGroup();
// start writing array contents
rc.startField("single_element_group", 0);
rc.startGroup();
rc.startField("count", 0);
rc.addLong(1234L);
rc.endField("count", 0);
rc.endGroup();
rc.startGroup();
rc.startField("count", 0);
rc.addLong(2345L);
rc.endField("count", 0);
rc.endGroup();
// finished writing array contents
rc.endField("single_element_group", 0);
rc.endGroup();
rc.endField("single_element_groups", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(new LongWritable(1234L), new LongWritable(2345L));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
}
use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.
the class TestArrayCompatibility method testMultiFieldGroupInList.
@Test
public void testMultiFieldGroupInList() throws Exception {
// tests the missing element layer, detected by a multi-field group
Path test = writeDirect("MultiFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named(// should not affect schema conversion
"element").named("locations").named("MultiFieldGroupInList"), new DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("locations", 0);
rc.startGroup();
rc.startField("element", 0);
rc.startGroup();
rc.startField("latitude", 0);
rc.addDouble(0.0);
rc.endField("latitude", 0);
rc.startField("longitude", 1);
rc.addDouble(0.0);
rc.endField("longitude", 1);
rc.endGroup();
rc.startGroup();
rc.startField("latitude", 0);
rc.addDouble(0.0);
rc.endField("latitude", 0);
rc.startField("longitude", 1);
rc.addDouble(180.0);
rc.endField("longitude", 1);
rc.endGroup();
rc.endField("element", 0);
rc.endGroup();
rc.endField("locations", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(0.0)), record(new DoubleWritable(0.0), new DoubleWritable(180.0)));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
}
Aggregations