Search in sources :

Example 21 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testTimestampInt96.

@Test
public void testTimestampInt96() throws Exception {
    String columnNames = "ts";
    String columnTypes = "timestamp";
    String fileSchema = "message hive_schema {\n" + "  optional int96 ts;\n" + "}\n";
    ArrayWritable hiveRecord = createGroup(createTimestamp(Timestamp.valueOf("2016-01-01 01:01:01")));
    // Write record to Parquet format using CST timezone
    writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord), TimeZone.getTimeZone("CST"));
    // Verify record was written correctly to Parquet
    startMessage();
    startField("ts", 0);
    addBinary(NanoTimeUtils.getNanoTime(Timestamp.valueOf("2016-01-01 01:01:01"), Calendar.getInstance(TimeZone.getTimeZone("CST"))).toBinary());
    endField("ts", 0);
    endMessage();
    // Write record to Parquet format using PST timezone
    writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord), TimeZone.getTimeZone("PST"));
    // Verify record was written correctly to Parquet
    startMessage();
    startField("ts", 0);
    addBinary(NanoTimeUtils.getNanoTime(Timestamp.valueOf("2016-01-01 01:01:01"), Calendar.getInstance(TimeZone.getTimeZone("PST"))).toBinary());
    endField("ts", 0);
    endMessage();
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Example 22 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testSimpleType.

@Test
public void testSimpleType() throws Exception {
    String columnNames = "int,double,boolean,float,string,tinyint,smallint,bigint";
    String columnTypes = "int,double,boolean,float,string,tinyint,smallint,bigint";
    String fileSchema = "message hive_schema {\n" + "  optional int32 int;\n" + "  optional double double;\n" + "  optional boolean boolean;\n" + "  optional float float;\n" + "  optional binary string (UTF8);\n" + "  optional int32 tinyint;\n" + "  optional int32 smallint;\n" + "  optional int64 bigint;\n" + "}\n";
    ArrayWritable hiveRecord = createGroup(createInt(1), createDouble(1.0), createBoolean(true), createFloat(1.0f), createString("one"), createTinyInt((byte) 1), createSmallInt((short) 1), createBigInt((long) 1));
    // Write record to Parquet format
    writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
    // Verify record was written correctly to Parquet
    startMessage();
    startField("int", 0);
    addInteger(1);
    endField("int", 0);
    startField("double", 1);
    addDouble(1.0);
    endField("double", 1);
    startField("boolean", 2);
    addBoolean(true);
    endField("boolean", 2);
    startField("float", 3);
    addFloat(1.0f);
    endField("float", 3);
    startField("string", 4);
    addString("one");
    endField("string", 4);
    startField("tinyint", 5);
    addInteger(1);
    endField("tinyint", 5);
    startField("smallint", 6);
    addInteger(1);
    endField("smallint", 6);
    startField("bigint", 7);
    addLong(1);
    endField("bigint", 7);
    endMessage();
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Example 23 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testExpectedArrayTypeOnRecord.

@Test
public void testExpectedArrayTypeOnRecord() throws Exception {
    String columnNames = "arrayCol";
    String columnTypes = "int";
    ArrayWritable hiveRecord = createGroup(createInt(1));
    String fileSchema = "message hive_schema {\n" + "  optional group arrayCol (LIST) {\n" + "    repeated group bag {\n" + "      optional int32 array_element;\n" + "    }\n" + "  }\n" + "}\n";
    try {
        writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
        fail();
    } catch (RuntimeException e) {
        assertEquals("Parquet record is malformed: Invalid data type: expected LIST type, but found: PRIMITIVE", e.getMessage());
    }
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Example 24 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testStructType.

@Test
public void testStructType() throws Exception {
    String columnNames = "structCol";
    String columnTypes = "struct<a:int,b:double,c:boolean>";
    String fileSchema = "message hive_schema {\n" + "  optional group structCol {\n" + "    optional int32 a;\n" + "    optional double b;\n" + "    optional boolean c;\n" + "  }\n" + "}\n";
    ArrayWritable hiveRecord = createGroup(createGroup(createInt(1), createDouble(1.0), createBoolean(true)));
    // Write record to Parquet format
    writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
    // Verify record was written correctly to Parquet
    startMessage();
    startField("structCol", 0);
    startGroup();
    startField("a", 0);
    addInteger(1);
    endField("a", 0);
    startField("b", 1);
    addDouble(1.0);
    endField("b", 1);
    startField("c", 2);
    addBoolean(true);
    endField("c", 2);
    endGroup();
    endField("structCol", 0);
    endMessage();
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Example 25 with ArrayWritable

use of org.apache.hadoop.io.ArrayWritable in project hive by apache.

the class TestDataWritableWriter method testExpectedMapTypeOnRecord.

@Test
public void testExpectedMapTypeOnRecord() throws Exception {
    String columnNames = "mapCol";
    String columnTypes = "int";
    ArrayWritable hiveRecord = createGroup(createInt(1));
    String fileSchema = "message hive_schema {\n" + "  optional group mapCol (MAP) {\n" + "    repeated group map (MAP_KEY_VALUE) {\n" + "      required binary key;\n" + "      optional int32 value;\n" + "    }\n" + "  }\n" + "}\n";
    try {
        writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
        fail();
    } catch (RuntimeException e) {
        assertEquals("Parquet record is malformed: Invalid data type: expected MAP type, but found: PRIMITIVE", e.getMessage());
    }
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Test(org.junit.Test)

Aggregations

ArrayWritable (org.apache.hadoop.io.ArrayWritable)72 Test (org.junit.Test)41 IntWritable (org.apache.hadoop.io.IntWritable)31 Writable (org.apache.hadoop.io.Writable)29 Path (org.apache.hadoop.fs.Path)18 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)18 LongWritable (org.apache.hadoop.io.LongWritable)18 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)18 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)15 ArrayList (java.util.ArrayList)13 BytesWritable (org.apache.hadoop.io.BytesWritable)10 List (java.util.List)9 BooleanWritable (org.apache.hadoop.io.BooleanWritable)8 FloatWritable (org.apache.hadoop.io.FloatWritable)8 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 NullWritable (org.apache.hadoop.io.NullWritable)6 Text (org.apache.hadoop.io.Text)6 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)5 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5