Search in sources :

Example 11 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestArrayCompatibility method testUnannotatedListOfGroups.

@Test
public void testUnannotatedListOfGroups() throws Exception {
    Path test = writeDirect("UnannotatedListOfGroups", Types.buildMessage().repeatedGroup().required(FLOAT).named("x").required(FLOAT).named("y").named("list_of_points").named("UnannotatedListOfGroups"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("list_of_points", 0);
            rc.startGroup();
            rc.startField("x", 0);
            rc.addFloat(1.0f);
            rc.endField("x", 0);
            rc.startField("y", 1);
            rc.addFloat(1.0f);
            rc.endField("y", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("x", 0);
            rc.addFloat(2.0f);
            rc.endField("x", 0);
            rc.startField("y", 1);
            rc.addFloat(2.0f);
            rc.endField("y", 1);
            rc.endGroup();
            rc.endField("list_of_points", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new FloatWritable(1.0f), new FloatWritable(1.0f)), record(new FloatWritable(2.0f), new FloatWritable(2.0f)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) FloatWritable(org.apache.hadoop.io.FloatWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 12 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestArrayCompatibility method testAvroPrimitiveInList.

@Test
public void testAvroPrimitiveInList() throws Exception {
    Path test = writeDirect("AvroPrimitiveInList", Types.buildMessage().requiredGroup().as(LIST).repeated(INT32).named("array").named("list_of_ints").named("AvroPrimitiveInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("list_of_ints", 0);
            rc.startGroup();
            rc.startField("array", 0);
            rc.addInteger(34);
            rc.addInteger(35);
            rc.addInteger(36);
            rc.endField("array", 0);
            rc.endGroup();
            rc.endField("list_of_ints", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(new IntWritable(34), new IntWritable(35), new IntWritable(36));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 13 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestArrayCompatibility method testUnannotatedListOfPrimitives.

@Test
public void testUnannotatedListOfPrimitives() throws Exception {
    MessageType fileSchema = Types.buildMessage().repeated(INT32).named("list_of_ints").named("UnannotatedListOfPrimitives");
    Path test = writeDirect("UnannotatedListOfPrimitives", fileSchema, new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("list_of_ints", 0);
            rc.addInteger(34);
            rc.addInteger(35);
            rc.addInteger(36);
            rc.endField("list_of_ints", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(new IntWritable(34), new IntWritable(35), new IntWritable(36));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) MessageType(org.apache.parquet.schema.MessageType) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 14 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestArrayCompatibility method testAvroSingleFieldGroupInList.

@Test
public void testAvroSingleFieldGroupInList() throws Exception {
    // this tests the case where older data has an ambiguous structure, but the
    // correct interpretation can be determined from the repeated name, "array"
    Path test = writeDirect("AvroSingleFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(INT64).named("count").named("array").named("single_element_groups").named("AvroSingleFieldGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("single_element_groups", 0);
            rc.startGroup();
            // start writing array contents
            rc.startField("array", 0);
            rc.startGroup();
            rc.startField("count", 0);
            rc.addLong(1234L);
            rc.endField("count", 0);
            rc.endGroup();
            rc.startGroup();
            rc.startField("count", 0);
            rc.addLong(2345L);
            rc.endField("count", 0);
            rc.endGroup();
            // finished writing array contents
            rc.endField("array", 0);
            rc.endGroup();
            rc.endField("single_element_groups", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new LongWritable(1234L)), record(new LongWritable(2345L)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 15 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestParquetSerDe method deserializeAndSerializeLazySimple.

private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException {
    // Get the row structure
    final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    // Deserialize
    final Object row = serDe.deserialize(t);
    assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class);
    assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length);
    assertEquals("deserialization gives the wrong object", t, row);
    // Serialize
    final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi);
    assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable) serializedArr.getObject()).get().length);
    assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable) serializedArr.getObject()));
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) ParquetHiveRecord(org.apache.hadoop.hive.serde2.io.ParquetHiveRecord) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

ArrayWritable (org.apache.hadoop.io.ArrayWritable)72 Test (org.junit.Test)41 IntWritable (org.apache.hadoop.io.IntWritable)31 Writable (org.apache.hadoop.io.Writable)29 Path (org.apache.hadoop.fs.Path)18 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)18 LongWritable (org.apache.hadoop.io.LongWritable)18 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)18 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)15 ArrayList (java.util.ArrayList)13 BytesWritable (org.apache.hadoop.io.BytesWritable)10 List (java.util.List)9 BooleanWritable (org.apache.hadoop.io.BooleanWritable)8 FloatWritable (org.apache.hadoop.io.FloatWritable)8 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 NullWritable (org.apache.hadoop.io.NullWritable)6 Text (org.apache.hadoop.io.Text)6 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)5 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5