Search in sources :

Example 11 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestMapStructures method testStringMapOfOptionalIntArray.

@Test
public void testStringMapOfOptionalIntArray() throws Exception {
    // tests a multimap structure for PARQUET-26
    Path test = writeDirect("StringMapOfOptionalIntArray", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optionalGroup().as(LIST).repeatedGroup().optional(INT32).named("element").named("list").named("value").named("key_value").named("examples").named("StringMapOfOptionalIntArray"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("examples", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("low"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("list", 0);
            rc.startGroup();
            rc.startField("element", 0);
            rc.addInteger(34);
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("element", 0);
            rc.addInteger(35);
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            // adds a null element
            rc.endGroup();
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("high"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("list", 0);
            rc.startGroup();
            rc.startField("element", 0);
            rc.addInteger(340);
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("element", 0);
            rc.addInteger(360);
            rc.endField("element", 0);
            rc.endGroup();
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("examples", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new Text("low"), record(new IntWritable(34), new IntWritable(35), null)), record(new Text("high"), record(new IntWritable(340), new IntWritable(360))));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<int>>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Text(org.apache.hadoop.io.Text) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 12 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestMapStructures method testNestedMap.

@Test
public void testNestedMap() throws Exception {
    Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().optional(BINARY).as(UTF8).named("key").optionalGroup().as(MAP).repeatedGroup().optional(BINARY).as(UTF8).named("key").required(INT32).named("value").named("key_value").named("value").named("key_value").named("map_of_maps").named("NestedMap"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("map_of_maps", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("a"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("b"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(1);
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("b"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("a"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(-1);
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("b"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(-2);
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("map_of_maps", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new Text("a"), record(record(new Text("b"), new IntWritable(1)))), record(new Text("b"), record(record(new Text("a"), new IntWritable(-1)), record(new Text("b"), new IntWritable(-2)))));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("map_of_maps"), Arrays.asList("map<string,map<string,int>>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Text(org.apache.hadoop.io.Text) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 13 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestMapStructures method testStringMapOfOptionalArray.

@Test
public void testStringMapOfOptionalArray() throws Exception {
    // tests a multimap structure
    Path test = writeDirect("StringMapOfOptionalArray", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optionalGroup().as(LIST).repeatedGroup().optional(BINARY).as(UTF8).named("element").named("list").named("value").named("key_value").named("examples").named("StringMapOfOptionalArray"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("examples", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("green"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("list", 0);
            rc.startGroup();
            rc.startField("element", 0);
            rc.addBinary(Binary.fromString("lettuce"));
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("element", 0);
            rc.addBinary(Binary.fromString("kale"));
            rc.endField("element", 0);
            rc.endGroup();
            rc.startGroup();
            // adds a null element
            rc.endGroup();
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("brown"));
            rc.endField("key", 0);
            // no values array
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("examples", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new Text("green"), record(new Text("lettuce"), new Text("kale"), null)), record(new Text("brown"), null));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<string>>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Text(org.apache.hadoop.io.Text) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 14 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestArrayCompatibility method testAmbiguousSingleFieldGroupInList.

@Test
public void testAmbiguousSingleFieldGroupInList() throws Exception {
    // this tests the case where older data has an ambiguous list and is not
    // named indicating that the source considered the group significant
    Path test = writeDirect("SingleFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(INT64).named("count").named("single_element_group").named("single_element_groups").named("SingleFieldGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("single_element_groups", 0);
            rc.startGroup();
            // start writing array contents
            rc.startField("single_element_group", 0);
            rc.startGroup();
            rc.startField("count", 0);
            rc.addLong(1234L);
            rc.endField("count", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("count", 0);
            rc.addLong(2345L);
            rc.endField("count", 0);
            rc.endGroup();
            // finished writing array contents
            rc.endField("single_element_group", 0);
            rc.endGroup();
            rc.endField("single_element_groups", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(new LongWritable(1234L), new LongWritable(2345L));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 15 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestArrayCompatibility method testMultiFieldGroupInList.

@Test
public void testMultiFieldGroupInList() throws Exception {
    // tests the missing element layer, detected by a multi-field group
    Path test = writeDirect("MultiFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named(// should not affect schema conversion
    "element").named("locations").named("MultiFieldGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("locations", 0);
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(0.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(180.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            rc.endGroup();
            rc.endField("locations", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(0.0)), record(new DoubleWritable(0.0), new DoubleWritable(180.0)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Aggregations

Path (org.apache.hadoop.fs.Path)19 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)19 Test (org.junit.Test)19 ArrayWritable (org.apache.hadoop.io.ArrayWritable)18 IntWritable (org.apache.hadoop.io.IntWritable)9 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)6 Text (org.apache.hadoop.io.Text)5 LongWritable (org.apache.hadoop.io.LongWritable)3 MessageType (org.apache.parquet.schema.MessageType)2 ParquetRecordReaderWrapper (org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)1 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)1 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 FloatWritable (org.apache.hadoop.io.FloatWritable)1 FileSplit (org.apache.hadoop.mapred.FileSplit)1