Search in sources :

Example 46 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestArrayCompatibility method testMultiFieldGroupInList.

@Test
public void testMultiFieldGroupInList() throws Exception {
    // tests the missing element layer, detected by a multi-field group
    Path test = writeDirect("MultiFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named(// should not affect schema conversion
    "element").named("locations").named("MultiFieldGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("locations", 0);
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(0.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(180.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            rc.endGroup();
            rc.endField("locations", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(0.0)), record(new DoubleWritable(0.0), new DoubleWritable(180.0)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 47 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestArrayCompatibility method testHiveRequiredGroupInList.

@Test
public void testHiveRequiredGroupInList() throws Exception {
    // this matches the list structure that Hive writes
    Path test = writeDirect("HiveRequiredGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().requiredGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named("element").named("bag").named("locations").named("HiveRequiredGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("locations", 0);
            rc.startGroup();
            // start writing array contents
            rc.startField("bag", 0);
            // write a non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(180.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // write a second non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(0.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // finished writing array contents
            rc.endField("bag", 0);
            rc.endGroup();
            rc.endField("locations", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(180.0)), record(new DoubleWritable(0.0), new DoubleWritable(0.0)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 48 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestArrayCompatibility method testNewRequiredGroupInList.

@Test
public void testNewRequiredGroupInList() throws Exception {
    Path test = writeDirect("NewRequiredGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().requiredGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named("element").named("list").named("locations").named("NewRequiredGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("locations", 0);
            rc.startGroup();
            // start writing array contents
            rc.startField("list", 0);
            // write a non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(180.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // write a second non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(0.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // finished writing array contents
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("locations", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(180.0)), record(new DoubleWritable(0.0), new DoubleWritable(0.0)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 49 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testMapType.

@Test
public void testMapType() throws Exception {
    String columnNames = "mapCol";
    String columnTypes = "map<string,int>";
    String fileSchema = "message hive_schema {\n" + "  optional group mapCol (MAP) {\n" + "    repeated group map (MAP_KEY_VALUE) {\n" + "      required binary key;\n" + "      optional int32 value;\n" + "    }\n" + "  }\n" + "}\n";
    ArrayWritable hiveRecord = createGroup(createGroup(createArray(createString("key1"), createInt(1)), createArray(createString("key2"), createInt(2)), createArray(createString("key3"), createNull())));
    // Write record to Parquet format
    writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
    // Verify record was written correctly to Parquet
    startMessage();
    startField("mapCol", 0);
    startGroup();
    startField("map", 0);
    startGroup();
    startField("key", 0);
    addString("key1");
    endField("key", 0);
    startField("value", 1);
    addInteger(1);
    endField("value", 1);
    endGroup();
    startGroup();
    startField("key", 0);
    addString("key2");
    endField("key", 0);
    startField("value", 1);
    addInteger(2);
    endField("value", 1);
    endGroup();
    startGroup();
    startField("key", 0);
    addString("key3");
    endField("key", 0);
    endGroup();
    endField("map", 0);
    endGroup();
    endField("mapCol", 0);
    endMessage();
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Example 50 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testExpectedStructTypeOnRecord.

@Test
public void testExpectedStructTypeOnRecord() throws Exception {
    String columnNames = "structCol";
    String columnTypes = "int";
    ArrayWritable hiveRecord = createGroup(createInt(1));
    String fileSchema = "message hive_schema {\n" + "  optional group structCol {\n" + "      optional int32 int;\n" + "    }\n" + "}\n";
    try {
        writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
        fail();
    } catch (RuntimeException e) {
        assertEquals("Parquet record is malformed: Invalid data type: expected STRUCT type, but found: PRIMITIVE", e.getMessage());
    }
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Aggregations

ArrayWritable (org.apache.hadoop.io.ArrayWritable)72 Test (org.junit.Test)41 IntWritable (org.apache.hadoop.io.IntWritable)31 Writable (org.apache.hadoop.io.Writable)29 Path (org.apache.hadoop.fs.Path)18 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)18 LongWritable (org.apache.hadoop.io.LongWritable)18 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)18 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)15 ArrayList (java.util.ArrayList)13 BytesWritable (org.apache.hadoop.io.BytesWritable)10 List (java.util.List)9 BooleanWritable (org.apache.hadoop.io.BooleanWritable)8 FloatWritable (org.apache.hadoop.io.FloatWritable)8 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 NullWritable (org.apache.hadoop.io.NullWritable)6 Text (org.apache.hadoop.io.Text)6 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 MapWritable (org.apache.hadoop.io.MapWritable)5