Search in sources :

Example 16 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestArrayCompatibility method testHiveRequiredGroupInList.

@Test
public void testHiveRequiredGroupInList() throws Exception {
    // this matches the list structure that Hive writes
    Path test = writeDirect("HiveRequiredGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().requiredGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named("element").named("bag").named("locations").named("HiveRequiredGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("locations", 0);
            rc.startGroup();
            // start writing array contents
            rc.startField("bag", 0);
            // write a non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(180.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // write a second non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(0.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // finished writing array contents
            rc.endField("bag", 0);
            rc.endGroup();
            rc.endField("locations", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(180.0)), record(new DoubleWritable(0.0), new DoubleWritable(0.0)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 17 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestArrayCompatibility method testNewRequiredGroupInList.

@Test
public void testNewRequiredGroupInList() throws Exception {
    Path test = writeDirect("NewRequiredGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().requiredGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named("element").named("list").named("locations").named("NewRequiredGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("locations", 0);
            rc.startGroup();
            // start writing array contents
            rc.startField("list", 0);
            // write a non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(180.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // write a second non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(0.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // finished writing array contents
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("locations", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(180.0)), record(new DoubleWritable(0.0), new DoubleWritable(0.0)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 18 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestMapStructures method testMapWithComplexKey.

@Test
public void testMapWithComplexKey() throws Exception {
    Path test = writeDirect("MapWithComplexKey", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().requiredGroup().required(INT32).named("x").required(INT32).named("y").named("key").optional(DOUBLE).named("value").named("key_value").named("matrix").named("MapWithComplexKey"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("matrix", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.startGroup();
            rc.startField("x", 0);
            rc.addInteger(7);
            rc.endField("x", 0);
            rc.startField("y", 1);
            rc.addInteger(22);
            rc.endField("y", 1);
            rc.endGroup();
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addDouble(3.14);
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("matrix", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(record(new IntWritable(7), new IntWritable(22)), new DoubleWritable(3.14)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("matrix"), Arrays.asList("map<struct<x:int,y:int>,bigint>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 19 with RecordConsumer

use of org.apache.parquet.io.api.RecordConsumer in project hive by apache.

the class TestMapStructures method testStringMapRequiredPrimitive.

@Test
public void testStringMapRequiredPrimitive() throws Exception {
    Path test = writeDirect("StringMapRequiredPrimitive", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").required(INT32).named("value").named("key_value").named("votes").named("StringMapRequiredPrimitive"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("votes", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("lettuce"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(34);
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("cabbage"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(18);
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("votes", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new Text("lettuce"), new IntWritable(34)), record(new Text("cabbage"), new IntWritable(18)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("votes"), Arrays.asList("map<string,int>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Text(org.apache.hadoop.io.Text) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

Path (org.apache.hadoop.fs.Path)19 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)19 Test (org.junit.Test)19 ArrayWritable (org.apache.hadoop.io.ArrayWritable)18 IntWritable (org.apache.hadoop.io.IntWritable)9 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)6 Text (org.apache.hadoop.io.Text)5 LongWritable (org.apache.hadoop.io.LongWritable)3 MessageType (org.apache.parquet.schema.MessageType)2 ParquetRecordReaderWrapper (org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)1 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)1 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 FloatWritable (org.apache.hadoop.io.FloatWritable)1 FileSplit (org.apache.hadoop.mapred.FileSplit)1