Search in sources :

Example 21 with ShortObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector in project hive by apache.

the class BinarySortableSerDe method serialize.

static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, boolean invert, byte nullMarker, byte notNullMarker) throws SerDeException {
    // Is this field a null?
    if (o == null) {
        writeByte(buffer, nullMarker, invert);
        return;
    }
    // This field is not a null.
    writeByte(buffer, notNullMarker, invert);
    switch(oi.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
                switch(poi.getPrimitiveCategory()) {
                    case VOID:
                        {
                            return;
                        }
                    case BOOLEAN:
                        {
                            boolean v = ((BooleanObjectInspector) poi).get(o);
                            writeByte(buffer, (byte) (v ? 2 : 1), invert);
                            return;
                        }
                    case BYTE:
                        {
                            ByteObjectInspector boi = (ByteObjectInspector) poi;
                            byte v = boi.get(o);
                            writeByte(buffer, (byte) (v ^ 0x80), invert);
                            return;
                        }
                    case SHORT:
                        {
                            ShortObjectInspector spoi = (ShortObjectInspector) poi;
                            short v = spoi.get(o);
                            serializeShort(buffer, v, invert);
                            return;
                        }
                    case INT:
                        {
                            IntObjectInspector ioi = (IntObjectInspector) poi;
                            int v = ioi.get(o);
                            serializeInt(buffer, v, invert);
                            return;
                        }
                    case LONG:
                        {
                            LongObjectInspector loi = (LongObjectInspector) poi;
                            long v = loi.get(o);
                            serializeLong(buffer, v, invert);
                            return;
                        }
                    case FLOAT:
                        {
                            FloatObjectInspector foi = (FloatObjectInspector) poi;
                            serializeFloat(buffer, foi.get(o), invert);
                            return;
                        }
                    case DOUBLE:
                        {
                            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                            serializeDouble(buffer, doi.get(o), invert);
                            return;
                        }
                    case STRING:
                        {
                            StringObjectInspector soi = (StringObjectInspector) poi;
                            Text t = soi.getPrimitiveWritableObject(o);
                            serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                            return;
                        }
                    case CHAR:
                        {
                            HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi;
                            HiveCharWritable hc = hcoi.getPrimitiveWritableObject(o);
                            // Trailing space should ignored for char comparisons.
                            // So write stripped values for this SerDe.
                            Text t = hc.getStrippedValue();
                            serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                            return;
                        }
                    case VARCHAR:
                        {
                            HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
                            HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o);
                            // use varchar's text field directly
                            Text t = hc.getTextValue();
                            serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                            return;
                        }
                    case BINARY:
                        {
                            BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
                            BytesWritable ba = baoi.getPrimitiveWritableObject(o);
                            byte[] toSer = new byte[ba.getLength()];
                            System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength());
                            serializeBytes(buffer, toSer, ba.getLength(), invert);
                            return;
                        }
                    case DATE:
                        {
                            DateObjectInspector doi = (DateObjectInspector) poi;
                            int v = doi.getPrimitiveWritableObject(o).getDays();
                            serializeInt(buffer, v, invert);
                            return;
                        }
                    case TIMESTAMP:
                        {
                            TimestampObjectInspector toi = (TimestampObjectInspector) poi;
                            TimestampWritableV2 t = toi.getPrimitiveWritableObject(o);
                            serializeTimestampWritable(buffer, t, invert);
                            return;
                        }
                    case TIMESTAMPLOCALTZ:
                        {
                            TimestampLocalTZObjectInspector toi = (TimestampLocalTZObjectInspector) poi;
                            TimestampLocalTZWritable t = toi.getPrimitiveWritableObject(o);
                            serializeTimestampTZWritable(buffer, t, invert);
                            return;
                        }
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonthObjectInspector ioi = (HiveIntervalYearMonthObjectInspector) poi;
                            HiveIntervalYearMonth intervalYearMonth = ioi.getPrimitiveJavaObject(o);
                            serializeHiveIntervalYearMonth(buffer, intervalYearMonth, invert);
                            return;
                        }
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTimeObjectInspector ioi = (HiveIntervalDayTimeObjectInspector) poi;
                            HiveIntervalDayTime intervalDayTime = ioi.getPrimitiveJavaObject(o);
                            serializeHiveIntervalDayTime(buffer, intervalDayTime, invert);
                            return;
                        }
                    case DECIMAL:
                        {
                            HiveDecimalObjectInspector boi = (HiveDecimalObjectInspector) poi;
                            HiveDecimal dec = boi.getPrimitiveJavaObject(o);
                            serializeHiveDecimal(buffer, dec, invert);
                            return;
                        }
                    default:
                        {
                            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
                        }
                }
            }
        case LIST:
            {
                ListObjectInspector loi = (ListObjectInspector) oi;
                ObjectInspector eoi = loi.getListElementObjectInspector();
                // \1 followed by each element
                int size = loi.getListLength(o);
                for (int eid = 0; eid < size; eid++) {
                    writeByte(buffer, (byte) 1, invert);
                    serialize(buffer, loi.getListElement(o, eid), eoi, invert, nullMarker, notNullMarker);
                }
                // and \0 to terminate
                writeByte(buffer, (byte) 0, invert);
                return;
            }
        case MAP:
            {
                MapObjectInspector moi = (MapObjectInspector) oi;
                ObjectInspector koi = moi.getMapKeyObjectInspector();
                ObjectInspector voi = moi.getMapValueObjectInspector();
                // \1 followed by each key and then each value
                Map<?, ?> map = moi.getMap(o);
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    writeByte(buffer, (byte) 1, invert);
                    serialize(buffer, entry.getKey(), koi, invert, nullMarker, notNullMarker);
                    serialize(buffer, entry.getValue(), voi, invert, nullMarker, notNullMarker);
                }
                // and \0 to terminate
                writeByte(buffer, (byte) 0, invert);
                return;
            }
        case STRUCT:
            {
                StructObjectInspector soi = (StructObjectInspector) oi;
                List<? extends StructField> fields = soi.getAllStructFieldRefs();
                for (int i = 0; i < fields.size(); i++) {
                    serialize(buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(), invert, nullMarker, notNullMarker);
                }
                return;
            }
        case UNION:
            {
                UnionObjectInspector uoi = (UnionObjectInspector) oi;
                byte tag = uoi.getTag(o);
                writeByte(buffer, tag, invert);
                serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert, nullMarker, notNullMarker);
                return;
            }
        default:
            {
                throw new RuntimeException("Unrecognized type: " + oi.getCategory());
            }
    }
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) List(java.util.List) ArrayList(java.util.ArrayList) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 22 with ShortObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector in project hive by apache.

the class TestOrcFile method test1.

@Test
public void test1() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
    writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536, Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi", new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(inner(3, "good"), inner(4, "bad")), map()));
    writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536, Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye", new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")), map(inner(5, "chani"), inner(1, "mauddib"))));
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    TypeDescription schema = writer.getSchema();
    assertEquals(23, schema.getMaximumId());
    boolean[] expected = new boolean[] { false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false };
    boolean[] included = OrcUtils.includeColumns("", schema);
    assertEquals(true, Arrays.equals(expected, included));
    expected = new boolean[] { false, true, false, false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, true, true, true, true, true };
    included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
    assertEquals(true, Arrays.equals(expected, included));
    expected = new boolean[] { false, true, false, false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, true, true, true, true, true };
    included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
    assertEquals(true, Arrays.equals(expected, included));
    expected = new boolean[] { false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true };
    included = OrcUtils.includeColumns("boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", schema);
    assertEquals(true, Arrays.equals(expected, included));
    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(2, stats[1].getNumberOfValues());
    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
    assertEquals("count: 2 hasNull: false bytesOnDisk: 5 true: 1", stats[1].toString());
    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
    assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
    assertEquals("count: 2 hasNull: false bytesOnDisk: 9 min: 1024 max: 2048 sum: 3072", stats[3].toString());
    StripeStatistics ss = reader.getStripeStatistics().get(0);
    assertEquals(2, ss.getColumnStatistics()[0].getNumberOfValues());
    assertEquals(1, ((BooleanColumnStatistics) ss.getColumnStatistics()[1]).getTrueCount());
    assertEquals(1024, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMinimum());
    assertEquals(2048, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMaximum());
    assertEquals(3072, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getSum());
    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
    assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
    assertEquals("count: 2 hasNull: false bytesOnDisk: 15 min: -15.0 max: -5.0 sum: -20.0", stats[7].toString());
    assertEquals("count: 2 hasNull: false bytesOnDisk: 14 min: bye max: hi sum: 5", stats[9].toString());
    // check the inspectors
    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint," + "int1:int,long1:bigint,float1:float,double1:double,bytes1:" + "binary,string1:string,middle:struct<list:array<struct<int1:int," + "string1:string>>>,list:array<struct<int1:int,string1:string>>," + "map:map<string,struct<int1:int,string1:string>>>", readerInspector.getTypeName());
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.getStructFieldRef("boolean1").getFieldObjectInspector();
    ByteObjectInspector by = (ByteObjectInspector) readerInspector.getStructFieldRef("byte1").getFieldObjectInspector();
    ShortObjectInspector sh = (ShortObjectInspector) readerInspector.getStructFieldRef("short1").getFieldObjectInspector();
    IntObjectInspector in = (IntObjectInspector) readerInspector.getStructFieldRef("int1").getFieldObjectInspector();
    LongObjectInspector lo = (LongObjectInspector) readerInspector.getStructFieldRef("long1").getFieldObjectInspector();
    FloatObjectInspector fl = (FloatObjectInspector) readerInspector.getStructFieldRef("float1").getFieldObjectInspector();
    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.getStructFieldRef("double1").getFieldObjectInspector();
    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.getStructFieldRef("bytes1").getFieldObjectInspector();
    StringObjectInspector st = (StringObjectInspector) readerInspector.getStructFieldRef("string1").getFieldObjectInspector();
    StructObjectInspector mid = (StructObjectInspector) readerInspector.getStructFieldRef("middle").getFieldObjectInspector();
    List<? extends StructField> midFields = mid.getAllStructFieldRefs();
    ListObjectInspector midli = (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
    StructObjectInspector inner = (StructObjectInspector) midli.getListElementObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    ListObjectInspector li = (ListObjectInspector) readerInspector.getStructFieldRef("list").getFieldObjectInspector();
    MapObjectInspector ma = (MapObjectInspector) readerInspector.getStructFieldRef("map").getFieldObjectInspector();
    StringObjectInspector mk = (StringObjectInspector) ma.getMapKeyObjectInspector();
    RecordReader rows = reader.rows();
    Object row = rows.next(null);
    assertNotNull(row);
    // check the contents of the first row
    assertEquals(false, bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(1, by.get(readerInspector.getStructFieldData(row, fields.get(1))));
    assertEquals(1024, sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row, fields.get(3))));
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
    assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
    assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), 0.00001);
    assertEquals(bytes(0, 1, 2, 3, 4), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(8))));
    List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertNotNull(midRow);
    assertEquals(2, midRow.size());
    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(0), inFields.get(1))));
    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(1), inFields.get(1))));
    List<?> list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
    assertEquals(2, list.size());
    assertEquals(3, in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
    assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(0), inFields.get(1))));
    assertEquals(4, in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
    assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(1), inFields.get(1))));
    Map<?, ?> map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
    assertEquals(0, map.size());
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
    assertEquals(true, bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(100, by.get(readerInspector.getStructFieldData(row, fields.get(1))));
    assertEquals(2048, sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row, fields.get(3))));
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
    assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
    assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), 0.00001);
    assertEquals(bytes(), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(8))));
    midRow = midli.getList(mid.getStructFieldData(readerInspector.getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertNotNull(midRow);
    assertEquals(2, midRow.size());
    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(0), inFields.get(1))));
    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(1), inFields.get(1))));
    list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
    assertEquals(3, list.size());
    assertEquals(100000000, in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
    assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(0), inFields.get(1))));
    assertEquals(-100000, in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
    assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(1), inFields.get(1))));
    assertEquals(1234, in.get(inner.getStructFieldData(list.get(2), inFields.get(0))));
    assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(2), inFields.get(1))));
    map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
    assertEquals(2, map.size());
    boolean[] found = new boolean[2];
    for (Object key : map.keySet()) {
        String str = mk.getPrimitiveJavaObject(key);
        if (str.equals("chani")) {
            assertEquals(false, found[0]);
            assertEquals(5, in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
            assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(map.get(key), inFields.get(1))));
            found[0] = true;
        } else if (str.equals("mauddib")) {
            assertEquals(false, found[1]);
            assertEquals(1, in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
            assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(map.get(key), inFields.get(1))));
            found[1] = true;
        } else {
            throw new IllegalArgumentException("Unknown key " + str);
        }
    }
    assertEquals(true, found[0]);
    assertEquals(true, found[1]);
    // handle the close up
    assertEquals(false, rows.hasNext());
    rows.close();
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) TypeDescription(org.apache.orc.TypeDescription) DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) StringColumnStatistics(org.apache.orc.StringColumnStatistics) DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) ColumnStatistics(org.apache.orc.ColumnStatistics) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) StripeStatistics(org.apache.orc.StripeStatistics) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 23 with ShortObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector in project hive by apache.

the class TestOrcFile method testReadFormat_0_11.

@Test
public void testReadFormat_0_11() throws Exception {
    Path oldFilePath = new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"));
    Reader reader = OrcFile.createReader(oldFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    int stripeCount = 0;
    int rowCount = 0;
    long currentOffset = -1;
    for (StripeInformation stripe : reader.getStripes()) {
        stripeCount += 1;
        rowCount += stripe.getNumberOfRows();
        if (currentOffset < 0) {
            currentOffset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength() + stripe.getFooterLength();
        } else {
            assertEquals(currentOffset, stripe.getOffset());
            currentOffset += stripe.getIndexLength() + stripe.getDataLength() + stripe.getFooterLength();
        }
    }
    assertEquals(reader.getNumberOfRows(), rowCount);
    assertEquals(2, stripeCount);
    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(7500, stats[1].getNumberOfValues());
    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
    assertEquals("count: 7500 hasNull: true true: 3750", stats[1].toString());
    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
    assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
    assertEquals("count: 7500 hasNull: true min: 1024 max: 2048 sum: 11520000", stats[3].toString());
    assertEquals(Long.MAX_VALUE, ((IntegerColumnStatistics) stats[5]).getMaximum());
    assertEquals(Long.MAX_VALUE, ((IntegerColumnStatistics) stats[5]).getMinimum());
    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
    assertEquals("count: 7500 hasNull: true min: 9223372036854775807 max: 9223372036854775807", stats[5].toString());
    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
    assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
    assertEquals("count: 7500 hasNull: true min: -15.0 max: -5.0 sum: -75000.0", stats[7].toString());
    assertEquals("count: 7500 hasNull: true min: bye max: hi sum: 0", stats[9].toString());
    // check the inspectors
    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint," + "int1:int,long1:bigint,float1:float,double1:double,bytes1:" + "binary,string1:string,middle:struct<list:array<struct<int1:int," + "string1:string>>>,list:array<struct<int1:int,string1:string>>," + "map:map<string,struct<int1:int,string1:string>>,ts:timestamp," + "decimal1:decimal(38,10)>", readerInspector.getTypeName());
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.getStructFieldRef("boolean1").getFieldObjectInspector();
    ByteObjectInspector by = (ByteObjectInspector) readerInspector.getStructFieldRef("byte1").getFieldObjectInspector();
    ShortObjectInspector sh = (ShortObjectInspector) readerInspector.getStructFieldRef("short1").getFieldObjectInspector();
    IntObjectInspector in = (IntObjectInspector) readerInspector.getStructFieldRef("int1").getFieldObjectInspector();
    LongObjectInspector lo = (LongObjectInspector) readerInspector.getStructFieldRef("long1").getFieldObjectInspector();
    FloatObjectInspector fl = (FloatObjectInspector) readerInspector.getStructFieldRef("float1").getFieldObjectInspector();
    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.getStructFieldRef("double1").getFieldObjectInspector();
    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.getStructFieldRef("bytes1").getFieldObjectInspector();
    StringObjectInspector st = (StringObjectInspector) readerInspector.getStructFieldRef("string1").getFieldObjectInspector();
    StructObjectInspector mid = (StructObjectInspector) readerInspector.getStructFieldRef("middle").getFieldObjectInspector();
    List<? extends StructField> midFields = mid.getAllStructFieldRefs();
    ListObjectInspector midli = (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
    StructObjectInspector inner = (StructObjectInspector) midli.getListElementObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    ListObjectInspector li = (ListObjectInspector) readerInspector.getStructFieldRef("list").getFieldObjectInspector();
    MapObjectInspector ma = (MapObjectInspector) readerInspector.getStructFieldRef("map").getFieldObjectInspector();
    TimestampObjectInspector tso = (TimestampObjectInspector) readerInspector.getStructFieldRef("ts").getFieldObjectInspector();
    HiveDecimalObjectInspector dco = (HiveDecimalObjectInspector) readerInspector.getStructFieldRef("decimal1").getFieldObjectInspector();
    StringObjectInspector mk = (StringObjectInspector) ma.getMapKeyObjectInspector();
    RecordReader rows = reader.rows();
    Object row = rows.next(null);
    assertNotNull(row);
    // check the contents of the first row
    assertEquals(false, bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(1, by.get(readerInspector.getStructFieldData(row, fields.get(1))));
    assertEquals(1024, sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row, fields.get(3))));
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
    assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
    assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), 0.00001);
    assertEquals(bytes(0, 1, 2, 3, 4), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(8))));
    List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertNotNull(midRow);
    assertEquals(2, midRow.size());
    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(0), inFields.get(1))));
    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(1), inFields.get(1))));
    List<?> list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
    assertEquals(2, list.size());
    assertEquals(3, in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
    assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(0), inFields.get(1))));
    assertEquals(4, in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
    assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(1), inFields.get(1))));
    Map<?, ?> map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
    assertEquals(0, map.size());
    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"), tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(12))));
    assertEquals(HiveDecimal.create("12345678.6547456"), dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(13))));
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    rows.seekToRow(7499);
    row = rows.next(null);
    assertEquals(true, bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(100, by.get(readerInspector.getStructFieldData(row, fields.get(1))));
    assertEquals(2048, sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row, fields.get(3))));
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
    assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
    assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), 0.00001);
    assertEquals(bytes(), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(8))));
    midRow = midli.getList(mid.getStructFieldData(readerInspector.getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertNotNull(midRow);
    assertEquals(2, midRow.size());
    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(0), inFields.get(1))));
    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(1), inFields.get(1))));
    list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
    assertEquals(3, list.size());
    assertEquals(100000000, in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
    assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(0), inFields.get(1))));
    assertEquals(-100000, in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
    assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(1), inFields.get(1))));
    assertEquals(1234, in.get(inner.getStructFieldData(list.get(2), inFields.get(0))));
    assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(2), inFields.get(1))));
    map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
    assertEquals(2, map.size());
    boolean[] found = new boolean[2];
    for (Object key : map.keySet()) {
        String str = mk.getPrimitiveJavaObject(key);
        if (str.equals("chani")) {
            assertEquals(false, found[0]);
            assertEquals(5, in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
            assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(map.get(key), inFields.get(1))));
            found[0] = true;
        } else if (str.equals("mauddib")) {
            assertEquals(false, found[1]);
            assertEquals(1, in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
            assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(map.get(key), inFields.get(1))));
            found[1] = true;
        } else {
            throw new IllegalArgumentException("Unknown key " + str);
        }
    }
    assertEquals(true, found[0]);
    assertEquals(true, found[1]);
    assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"), tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(12))));
    assertEquals(HiveDecimal.create("12345678.6547457"), dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(13))));
    // handle the close up
    assertEquals(false, rows.hasNext());
    rows.close();
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) Path(org.apache.hadoop.fs.Path) DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) StringColumnStatistics(org.apache.orc.StringColumnStatistics) DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) ColumnStatistics(org.apache.orc.ColumnStatistics) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) StripeInformation(org.apache.orc.StripeInformation) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)18 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)17 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)17 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)17 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)17 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)17 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)17 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)16 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)16 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)15 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)15 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)13 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)13 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)12 HiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector)12 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)11 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)10 Text (org.apache.hadoop.io.Text)10 Map (java.util.Map)9 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)9