use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestMapStructures method testStringMapOfOptionalIntArray.
@Test
public void testStringMapOfOptionalIntArray() throws Exception {
// tests a multimap structure for PARQUET-26
Path test = writeDirect("StringMapOfOptionalIntArray", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optionalGroup().as(LIST).repeatedGroup().optional(INT32).named("element").named("list").named("value").named("key_value").named("examples").named("StringMapOfOptionalIntArray"), new TestArrayCompatibility.DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("examples", 0);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("low"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("list", 0);
rc.startGroup();
rc.startField("element", 0);
rc.addInteger(34);
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
rc.startField("element", 0);
rc.addInteger(35);
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
// adds a null element
rc.endGroup();
rc.endField("list", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("high"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("list", 0);
rc.startGroup();
rc.startField("element", 0);
rc.addInteger(340);
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
rc.startField("element", 0);
rc.addInteger(360);
rc.endField("element", 0);
rc.endGroup();
rc.endField("list", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("examples", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new Text("low"), record(new IntWritable(34), new IntWritable(35), null)), record(new Text("high"), record(new IntWritable(340), new IntWritable(360))));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<int>>"));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestMapStructures method testNestedMap.
@Test
public void testNestedMap() throws Exception {
Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().optional(BINARY).as(UTF8).named("key").optionalGroup().as(MAP).repeatedGroup().optional(BINARY).as(UTF8).named("key").required(INT32).named("value").named("key_value").named("value").named("key_value").named("map_of_maps").named("NestedMap"), new TestArrayCompatibility.DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("map_of_maps", 0);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("a"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("b"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.addInteger(1);
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("b"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("a"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.addInteger(-1);
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("b"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.addInteger(-2);
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("map_of_maps", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new Text("a"), record(record(new Text("b"), new IntWritable(1)))), record(new Text("b"), record(record(new Text("a"), new IntWritable(-1)), record(new Text("b"), new IntWritable(-2)))));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
deserialize(records.get(0), Arrays.asList("map_of_maps"), Arrays.asList("map<string,map<string,int>>"));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestMapStructures method testStringMapOfOptionalArray.
@Test
public void testStringMapOfOptionalArray() throws Exception {
// tests a multimap structure
Path test = writeDirect("StringMapOfOptionalArray", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optionalGroup().as(LIST).repeatedGroup().optional(BINARY).as(UTF8).named("element").named("list").named("value").named("key_value").named("examples").named("StringMapOfOptionalArray"), new TestArrayCompatibility.DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("examples", 0);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("green"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("list", 0);
rc.startGroup();
rc.startField("element", 0);
rc.addBinary(Binary.fromString("lettuce"));
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
rc.startField("element", 0);
rc.addBinary(Binary.fromString("kale"));
rc.endField("element", 0);
rc.endGroup();
rc.startGroup();
// adds a null element
rc.endGroup();
rc.endField("list", 0);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("brown"));
rc.endField("key", 0);
// no values array
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("examples", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new Text("green"), record(new Text("lettuce"), new Text("kale"), null)), record(new Text("brown"), null));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<string>>"));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestParquetSerDe method testParquetHiveSerDeComplexTypes.
public void testParquetHiveSerDeComplexTypes() throws Throwable {
// Initialize
ParquetHiveSerDe serDe = new ParquetHiveSerDe();
Configuration conf = new Configuration();
Properties tblProperties = new Properties();
tblProperties.setProperty(serdeConstants.LIST_COLUMNS, "a,s");
tblProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "int,struct<a:int,b:string>");
conf.set(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR, "s.b");
serDe.initialize(conf, tblProperties);
// Generate test data
Writable[] wb = new Writable[1];
wb[0] = new BytesWritable("foo".getBytes("UTF-8"));
Writable[] ws = new Writable[2];
ws[0] = null;
ArrayWritable awb = new ArrayWritable(Writable.class, wb);
ws[1] = awb;
ArrayWritable aws = new ArrayWritable(Writable.class, ws);
// Inspect the test data
StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
StructField s = soi.getStructFieldRef("s");
assertEquals(awb, soi.getStructFieldData(aws, s));
StructObjectInspector boi = (StructObjectInspector) s.getFieldObjectInspector();
StructField b = boi.getStructFieldRef("b");
assertEquals(wb[0], boi.getStructFieldData(awb, b));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestArrayCompatibility method testAmbiguousSingleFieldGroupInList.
@Test
public void testAmbiguousSingleFieldGroupInList() throws Exception {
// this tests the case where older data has an ambiguous list and is not
// named indicating that the source considered the group significant
Path test = writeDirect("SingleFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(INT64).named("count").named("single_element_group").named("single_element_groups").named("SingleFieldGroupInList"), new DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("single_element_groups", 0);
rc.startGroup();
// start writing array contents
rc.startField("single_element_group", 0);
rc.startGroup();
rc.startField("count", 0);
rc.addLong(1234L);
rc.endField("count", 0);
rc.endGroup();
rc.startGroup();
rc.startField("count", 0);
rc.addLong(2345L);
rc.endField("count", 0);
rc.endGroup();
// finished writing array contents
rc.endField("single_element_group", 0);
rc.endGroup();
rc.endField("single_element_groups", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(new LongWritable(1234L), new LongWritable(2345L));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
}
Aggregations