Search in sources :

Example 16 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestMapStructures method testDoubleMapWithStructValue.

@Test
public void testDoubleMapWithStructValue() throws Exception {
    Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().optional(DOUBLE).named("key").optionalGroup().required(INT32).named("x").required(INT32).named("y").named("value").named("key_value").named("approx").named("DoubleMapWithStructValue"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("approx", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addDouble(3.14);
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("x", 0);
            rc.addInteger(7);
            rc.endField("x", 0);
            rc.startField("y", 1);
            rc.addInteger(22);
            rc.endField("y", 1);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("approx", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(3.14), record(new IntWritable(7), new IntWritable(22))));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("approx"), Arrays.asList("map<bigint,struct<x:int,y:int>>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 17 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestMapStructures method testStringMapOptionalPrimitive.

@Test
public void testStringMapOptionalPrimitive() throws Exception {
    Path test = writeDirect("StringMapOptionalPrimitive", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optional(INT32).named("value").named("key_value").named("votes").named("StringMapOptionalPrimitive"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("votes", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("lettuce"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(34);
            rc.endField("value", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("kale"));
            rc.endField("key", 0);
            // no value for kale
            rc.endGroup();
            rc.startGroup();
            rc.startField("key", 0);
            rc.addBinary(Binary.fromString("cabbage"));
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.addInteger(18);
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("votes", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new Text("lettuce"), new IntWritable(34)), record(new Text("kale"), null), record(new Text("cabbage"), new IntWritable(18)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("votes"), Arrays.asList("map<string,int>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Text(org.apache.hadoop.io.Text) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 18 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestParquetSerDe method testParquetHiveSerDe.

public void testParquetHiveSerDe() throws Throwable {
    try {
        // Create the SerDe
        System.out.println("test: testParquetHiveSerDe");
        final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
        final Configuration conf = new Configuration();
        final Properties tbl = createProperties();
        SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
        // Data
        final Writable[] arr = new Writable[9];
        //primitive types
        arr[0] = new ByteWritable((byte) 123);
        arr[1] = new ShortWritable((short) 456);
        arr[2] = new IntWritable(789);
        arr[3] = new LongWritable(1000l);
        arr[4] = new DoubleWritable((double) 5.3);
        arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
        arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8"));
        final Writable[] map = new Writable[3];
        for (int i = 0; i < 3; ++i) {
            final Writable[] pair = new Writable[2];
            pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
            pair[1] = new IntWritable(i);
            map[i] = new ArrayWritable(Writable.class, pair);
        }
        arr[7] = new ArrayWritable(Writable.class, map);
        final Writable[] array = new Writable[5];
        for (int i = 0; i < 5; ++i) {
            array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
        }
        arr[8] = new ArrayWritable(Writable.class, array);
        final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
        // Test
        deserializeAndSerializeLazySimple(serDe, arrWritable);
        System.out.println("test: testParquetHiveSerDe - OK");
    } catch (final Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Configuration(org.apache.hadoop.conf.Configuration) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 19 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testArrayOfArrays.

@Test
public void testArrayOfArrays() throws Exception {
    String columnNames = "array_of_arrays";
    String columnTypes = "array<array<int>>";
    String fileSchema = "message hive_schema {\n" + "  optional group array_of_arrays (LIST) {\n" + "    repeated group array {\n" + "      optional group array_element (LIST) {\n" + "        repeated group array {\n" + "          optional int32 array_element;\n" + "        }\n" + "      }\n" + "    }\n" + "  }\n" + "}\n";
    ArrayWritable hiveRecord = createGroup(createArray(createArray(createInt(1), createInt(2))));
    // Write record to Parquet format
    writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
    // Verify record was written correctly to Parquet
    startMessage();
    startField("array_of_arrays", 0);
    startGroup();
    startField("array", 0);
    startGroup();
    startField("array_element", 0);
    startGroup();
    startField("array", 0);
    startGroup();
    startField("array_element", 0);
    addInteger(1);
    endField("array_element", 0);
    endGroup();
    startGroup();
    startField("array_element", 0);
    addInteger(2);
    endField("array_element", 0);
    endGroup();
    endField("array", 0);
    endGroup();
    endField("array_element", 0);
    endGroup();
    endField("array", 0);
    endGroup();
    endField("array_of_arrays", 0);
    endMessage();
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Example 20 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testArrayType.

@Test
public void testArrayType() throws Exception {
    String columnNames = "arrayCol";
    String columnTypes = "array<int>";
    String fileSchema = "message hive_schema {\n" + "  optional group arrayCol (LIST) {\n" + "    repeated group array {\n" + "      optional int32 array_element;\n" + "    }\n" + "  }\n" + "}\n";
    ArrayWritable hiveRecord = createGroup(createArray(createInt(1), createNull(), createInt(2)));
    // Write record to Parquet format
    writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
    // Verify record was written correctly to Parquet
    startMessage();
    startField("arrayCol", 0);
    startGroup();
    startField("array", 0);
    startGroup();
    startField("array_element", 0);
    addInteger(1);
    endField("array_element", 0);
    endGroup();
    startGroup();
    endGroup();
    startGroup();
    startField("array_element", 0);
    addInteger(2);
    endField("array_element", 0);
    endGroup();
    endField("array", 0);
    endGroup();
    endField("arrayCol", 0);
    endMessage();
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Aggregations

ArrayWritable (org.apache.hadoop.io.ArrayWritable)72 Test (org.junit.Test)41 IntWritable (org.apache.hadoop.io.IntWritable)31 Writable (org.apache.hadoop.io.Writable)29 Path (org.apache.hadoop.fs.Path)18 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)18 LongWritable (org.apache.hadoop.io.LongWritable)18 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)18 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)15 ArrayList (java.util.ArrayList)13 BytesWritable (org.apache.hadoop.io.BytesWritable)10 List (java.util.List)9 BooleanWritable (org.apache.hadoop.io.BooleanWritable)8 FloatWritable (org.apache.hadoop.io.FloatWritable)8 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 NullWritable (org.apache.hadoop.io.NullWritable)6 Text (org.apache.hadoop.io.Text)6 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)5 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5