Search in sources :

Example 41 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestMapStructures method testStringMapOfOptionalIntArray.

@Test
public void testStringMapOfOptionalIntArray() throws Exception {
    // tests a multimap structure for PARQUET-26
    Path test = writeDirect("StringMapOfOptionalIntArray", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optionalGroup().as(LIST).repeatedGroup().optional(INT32).named("element").named("list").named("value").named("key_value").named("examples").named("StringMapOfOptionalIntArray"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("examples", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("low"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("list", 0);
            rc.startGroup();
            rc.startField("element", 0);
            rc.addInteger(34);
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("element", 0);
            rc.addInteger(35);
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            // adds a null element
            rc.endGroup();
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("high"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("list", 0);
            rc.startGroup();
            rc.startField("element", 0);
            rc.addInteger(340);
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("element", 0);
            rc.addInteger(360);
            rc.endField("element", 0);
            rc.endGroup();
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("examples", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new Text("low"), record(new IntWritable(34), new IntWritable(35), null)), record(new Text("high"), record(new IntWritable(340), new IntWritable(360))));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<int>>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Text(org.apache.hadoop.io.Text) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 42 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestMapStructures method testNestedMap.

@Test
public void testNestedMap() throws Exception {
    Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().optional(BINARY).as(UTF8).named("key").optionalGroup().as(MAP).repeatedGroup().optional(BINARY).as(UTF8).named("key").required(INT32).named("value").named("key_value").named("value").named("key_value").named("map_of_maps").named("NestedMap"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("map_of_maps", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("a"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("b"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(1);
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("b"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("a"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(-1);
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("b"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(-2);
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("map_of_maps", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new Text("a"), record(record(new Text("b"), new IntWritable(1)))), record(new Text("b"), record(record(new Text("a"), new IntWritable(-1)), record(new Text("b"), new IntWritable(-2)))));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("map_of_maps"), Arrays.asList("map<string,map<string,int>>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Text(org.apache.hadoop.io.Text) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 43 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestMapStructures method testStringMapOfOptionalArray.

@Test
public void testStringMapOfOptionalArray() throws Exception {
    // tests a multimap structure
    Path test = writeDirect("StringMapOfOptionalArray", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optionalGroup().as(LIST).repeatedGroup().optional(BINARY).as(UTF8).named("element").named("list").named("value").named("key_value").named("examples").named("StringMapOfOptionalArray"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("examples", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("green"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("list", 0);
            rc.startGroup();
            rc.startField("element", 0);
            rc.addBinary(Binary.fromString("lettuce"));
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("element", 0);
            rc.addBinary(Binary.fromString("kale"));
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            // adds a null element
            rc.endGroup();
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("brown"));
            rc.endField("key", 0);
            // no values array
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("examples", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new Text("green"), record(new Text("lettuce"), new Text("kale"), null)), record(new Text("brown"), null));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<string>>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Text(org.apache.hadoop.io.Text) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 44 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestParquetSerDe method testParquetHiveSerDeComplexTypes.

public void testParquetHiveSerDeComplexTypes() throws Throwable {
    // Initialize
    ParquetHiveSerDe serDe = new ParquetHiveSerDe();
    Configuration conf = new Configuration();
    Properties tblProperties = new Properties();
    tblProperties.setProperty(serdeConstants.LIST_COLUMNS, "a,s");
    tblProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "int,struct<a:int,b:string>");
    conf.set(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR, "s.b");
    serDe.initialize(conf, tblProperties);
    // Generate test data
    Writable[] wb = new Writable[1];
    wb[0] = new BytesWritable("foo".getBytes("UTF-8"));
    Writable[] ws = new Writable[2];
    ws[0] = null;
    ArrayWritable awb = new ArrayWritable(Writable.class, wb);
    ws[1] = awb;
    ArrayWritable aws = new ArrayWritable(Writable.class, ws);
    // Inspect the test data
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
    StructField s = soi.getStructFieldRef("s");
    assertEquals(awb, soi.getStructFieldData(aws, s));
    StructObjectInspector boi = (StructObjectInspector) s.getFieldObjectInspector();
    StructField b = boi.getStructFieldRef("b");
    assertEquals(wb[0], boi.getStructFieldData(awb, b));
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Configuration(org.apache.hadoop.conf.Configuration) ArrayWritable(org.apache.hadoop.io.ArrayWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 45 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestArrayCompatibility method testAmbiguousSingleFieldGroupInList.

@Test
public void testAmbiguousSingleFieldGroupInList() throws Exception {
    // this tests the case where older data has an ambiguous list and is not
    // named indicating that the source considered the group significant
    Path test = writeDirect("SingleFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(INT64).named("count").named("single_element_group").named("single_element_groups").named("SingleFieldGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("single_element_groups", 0);
            rc.startGroup();
            // start writing array contents
            rc.startField("single_element_group", 0);
            rc.startGroup();
            rc.startField("count", 0);
            rc.addLong(1234L);
            rc.endField("count", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("count", 0);
            rc.addLong(2345L);
            rc.endField("count", 0);
            rc.endGroup();
            // finished writing array contents
            rc.endField("single_element_group", 0);
            rc.endGroup();
            rc.endField("single_element_groups", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(new LongWritable(1234L), new LongWritable(2345L));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Aggregations

ArrayWritable (org.apache.hadoop.io.ArrayWritable)72 Test (org.junit.Test)41 IntWritable (org.apache.hadoop.io.IntWritable)31 Writable (org.apache.hadoop.io.Writable)29 Path (org.apache.hadoop.fs.Path)18 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)18 LongWritable (org.apache.hadoop.io.LongWritable)18 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)18 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)15 ArrayList (java.util.ArrayList)13 BytesWritable (org.apache.hadoop.io.BytesWritable)10 List (java.util.List)9 BooleanWritable (org.apache.hadoop.io.BooleanWritable)8 FloatWritable (org.apache.hadoop.io.FloatWritable)8 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 NullWritable (org.apache.hadoop.io.NullWritable)6 Text (org.apache.hadoop.io.Text)6 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 MapWritable (org.apache.hadoop.io.MapWritable)5