Search in sources :

Example 11 with HiveDecimalObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector in project hive by apache.

the class TestOrcFile method testHiveDecimalIsNullReset.

@Test
public void testHiveDecimalIsNullReset() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(DecimalStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
    // orc creates 1000 batch size to make memory check align with 5000 instead of 5120
    for (int i = 0; i < 1000; i++) {
        writer.addRow(new DecimalStruct(null));
    }
    writer.addRow(new DecimalStruct(new HiveDecimalWritable("1.00")));
    writer.addRow(new DecimalStruct(new HiveDecimalWritable("2.00")));
    writer.addRow(new DecimalStruct(new HiveDecimalWritable("3.00")));
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector.getStructFieldRef("dec").getFieldObjectInspector();
    RecordReader rows = reader.rows();
    int idx = 0;
    while (rows.hasNext()) {
        Object row = rows.next(null);
        if (idx < 1000) {
            assertEquals(null, doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
        }
        if (idx == 1000) {
            assertEquals(new HiveDecimalWritable(1), doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
        }
        if (idx == 1001) {
            assertEquals(new HiveDecimalWritable(2), doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
        }
        if (idx == 10002) {
            assertEquals(new HiveDecimalWritable(3), doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
        }
        idx++;
    }
    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(1003, stats[0].getNumberOfValues());
    assertEquals(3, stats[1].getNumberOfValues());
    assertEquals(HiveDecimal.create(3), ((DecimalColumnStatistics) stats[1]).getMaximum());
    assertEquals(HiveDecimal.create(1), ((DecimalColumnStatistics) stats[1]).getMinimum());
    assertEquals(HiveDecimal.create(6), ((DecimalColumnStatistics) stats[1]).getSum());
    assertEquals(true, stats[1].hasNull());
}
Also used : DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) StringColumnStatistics(org.apache.orc.StringColumnStatistics) DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) ColumnStatistics(org.apache.orc.ColumnStatistics) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 12 with HiveDecimalObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector in project hive by apache.

the class TestOrcFile method testReadFormat_0_11.

@Test
public void testReadFormat_0_11() throws Exception {
    Path oldFilePath = new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"));
    Reader reader = OrcFile.createReader(oldFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    int stripeCount = 0;
    int rowCount = 0;
    long currentOffset = -1;
    for (StripeInformation stripe : reader.getStripes()) {
        stripeCount += 1;
        rowCount += stripe.getNumberOfRows();
        if (currentOffset < 0) {
            currentOffset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength() + stripe.getFooterLength();
        } else {
            assertEquals(currentOffset, stripe.getOffset());
            currentOffset += stripe.getIndexLength() + stripe.getDataLength() + stripe.getFooterLength();
        }
    }
    assertEquals(reader.getNumberOfRows(), rowCount);
    assertEquals(2, stripeCount);
    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(7500, stats[1].getNumberOfValues());
    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
    assertEquals("count: 7500 hasNull: true true: 3750", stats[1].toString());
    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
    assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
    assertEquals("count: 7500 hasNull: true min: 1024 max: 2048 sum: 11520000", stats[3].toString());
    assertEquals(Long.MAX_VALUE, ((IntegerColumnStatistics) stats[5]).getMaximum());
    assertEquals(Long.MAX_VALUE, ((IntegerColumnStatistics) stats[5]).getMinimum());
    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
    assertEquals("count: 7500 hasNull: true min: 9223372036854775807 max: 9223372036854775807", stats[5].toString());
    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
    assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
    assertEquals("count: 7500 hasNull: true min: -15.0 max: -5.0 sum: -75000.0", stats[7].toString());
    assertEquals("count: 7500 hasNull: true min: bye max: hi sum: 0", stats[9].toString());
    // check the inspectors
    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint," + "int1:int,long1:bigint,float1:float,double1:double,bytes1:" + "binary,string1:string,middle:struct<list:array<struct<int1:int," + "string1:string>>>,list:array<struct<int1:int,string1:string>>," + "map:map<string,struct<int1:int,string1:string>>,ts:timestamp," + "decimal1:decimal(38,18)>", readerInspector.getTypeName());
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.getStructFieldRef("boolean1").getFieldObjectInspector();
    ByteObjectInspector by = (ByteObjectInspector) readerInspector.getStructFieldRef("byte1").getFieldObjectInspector();
    ShortObjectInspector sh = (ShortObjectInspector) readerInspector.getStructFieldRef("short1").getFieldObjectInspector();
    IntObjectInspector in = (IntObjectInspector) readerInspector.getStructFieldRef("int1").getFieldObjectInspector();
    LongObjectInspector lo = (LongObjectInspector) readerInspector.getStructFieldRef("long1").getFieldObjectInspector();
    FloatObjectInspector fl = (FloatObjectInspector) readerInspector.getStructFieldRef("float1").getFieldObjectInspector();
    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.getStructFieldRef("double1").getFieldObjectInspector();
    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.getStructFieldRef("bytes1").getFieldObjectInspector();
    StringObjectInspector st = (StringObjectInspector) readerInspector.getStructFieldRef("string1").getFieldObjectInspector();
    StructObjectInspector mid = (StructObjectInspector) readerInspector.getStructFieldRef("middle").getFieldObjectInspector();
    List<? extends StructField> midFields = mid.getAllStructFieldRefs();
    ListObjectInspector midli = (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
    StructObjectInspector inner = (StructObjectInspector) midli.getListElementObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    ListObjectInspector li = (ListObjectInspector) readerInspector.getStructFieldRef("list").getFieldObjectInspector();
    MapObjectInspector ma = (MapObjectInspector) readerInspector.getStructFieldRef("map").getFieldObjectInspector();
    TimestampObjectInspector tso = (TimestampObjectInspector) readerInspector.getStructFieldRef("ts").getFieldObjectInspector();
    HiveDecimalObjectInspector dco = (HiveDecimalObjectInspector) readerInspector.getStructFieldRef("decimal1").getFieldObjectInspector();
    StringObjectInspector mk = (StringObjectInspector) ma.getMapKeyObjectInspector();
    RecordReader rows = reader.rows();
    Object row = rows.next(null);
    assertNotNull(row);
    // check the contents of the first row
    assertEquals(false, bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(1, by.get(readerInspector.getStructFieldData(row, fields.get(1))));
    assertEquals(1024, sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row, fields.get(3))));
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
    assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
    assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), 0.00001);
    assertEquals(bytes(0, 1, 2, 3, 4), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(8))));
    List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertNotNull(midRow);
    assertEquals(2, midRow.size());
    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(0), inFields.get(1))));
    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(1), inFields.get(1))));
    List<?> list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
    assertEquals(2, list.size());
    assertEquals(3, in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
    assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(0), inFields.get(1))));
    assertEquals(4, in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
    assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(1), inFields.get(1))));
    Map<?, ?> map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
    assertEquals(0, map.size());
    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"), tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(12))));
    assertEquals(HiveDecimal.create("12345678.6547456"), dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(13))));
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    rows.seekToRow(7499);
    row = rows.next(null);
    assertEquals(true, bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(100, by.get(readerInspector.getStructFieldData(row, fields.get(1))));
    assertEquals(2048, sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row, fields.get(3))));
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
    assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
    assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), 0.00001);
    assertEquals(bytes(), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(8))));
    midRow = midli.getList(mid.getStructFieldData(readerInspector.getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertNotNull(midRow);
    assertEquals(2, midRow.size());
    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(0), inFields.get(1))));
    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(1), inFields.get(1))));
    list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
    assertEquals(3, list.size());
    assertEquals(100000000, in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
    assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(0), inFields.get(1))));
    assertEquals(-100000, in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
    assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(1), inFields.get(1))));
    assertEquals(1234, in.get(inner.getStructFieldData(list.get(2), inFields.get(0))));
    assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(2), inFields.get(1))));
    map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
    assertEquals(2, map.size());
    boolean[] found = new boolean[2];
    for (Object key : map.keySet()) {
        String str = mk.getPrimitiveJavaObject(key);
        if (str.equals("chani")) {
            assertEquals(false, found[0]);
            assertEquals(5, in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
            assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(map.get(key), inFields.get(1))));
            found[0] = true;
        } else if (str.equals("mauddib")) {
            assertEquals(false, found[1]);
            assertEquals(1, in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
            assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(map.get(key), inFields.get(1))));
            found[1] = true;
        } else {
            throw new IllegalArgumentException("Unknown key " + str);
        }
    }
    assertEquals(true, found[0]);
    assertEquals(true, found[1]);
    assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"), tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(12))));
    assertEquals(HiveDecimal.create("12345678.6547457"), dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(13))));
    // handle the close up
    assertEquals(false, rows.hasNext());
    rows.close();
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) Path(org.apache.hadoop.fs.Path) DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) StringColumnStatistics(org.apache.orc.StringColumnStatistics) DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) ColumnStatistics(org.apache.orc.ColumnStatistics) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) StripeInformation(org.apache.orc.StripeInformation) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 13 with HiveDecimalObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector in project presto by prestodb.

the class SerDeUtils method serializePrimitive.

private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector) {
    requireNonNull(builder, "parent builder is null");
    if (object == null) {
        builder.appendNull();
        return;
    }
    switch(inspector.getPrimitiveCategory()) {
        case BOOLEAN:
            BooleanType.BOOLEAN.writeBoolean(builder, ((BooleanObjectInspector) inspector).get(object));
            return;
        case BYTE:
            TinyintType.TINYINT.writeLong(builder, ((ByteObjectInspector) inspector).get(object));
            return;
        case SHORT:
            SmallintType.SMALLINT.writeLong(builder, ((ShortObjectInspector) inspector).get(object));
            return;
        case INT:
            IntegerType.INTEGER.writeLong(builder, ((IntObjectInspector) inspector).get(object));
            return;
        case LONG:
            BigintType.BIGINT.writeLong(builder, ((LongObjectInspector) inspector).get(object));
            return;
        case FLOAT:
            RealType.REAL.writeLong(builder, floatToRawIntBits(((FloatObjectInspector) inspector).get(object)));
            return;
        case DOUBLE:
            DoubleType.DOUBLE.writeDouble(builder, ((DoubleObjectInspector) inspector).get(object));
            return;
        case STRING:
            type.writeSlice(builder, Slices.utf8Slice(((StringObjectInspector) inspector).getPrimitiveJavaObject(object)));
            return;
        case VARCHAR:
            type.writeSlice(builder, Slices.utf8Slice(((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(object).getValue()));
            return;
        case CHAR:
            CharType charType = (CharType) type;
            HiveChar hiveChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(object);
            type.writeSlice(builder, trimSpacesAndTruncateToLength(Slices.utf8Slice(hiveChar.getValue()), charType.getLength()));
            return;
        case DATE:
            DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector));
            return;
        case TIMESTAMP:
            TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector));
            return;
        case BINARY:
            VARBINARY.writeSlice(builder, Slices.wrappedBuffer(((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object)));
            return;
        case DECIMAL:
            DecimalType decimalType = (DecimalType) type;
            HiveDecimalWritable hiveDecimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveWritableObject(object);
            if (decimalType.isShort()) {
                decimalType.writeLong(builder, DecimalUtils.getShortDecimalValue(hiveDecimal, decimalType.getScale()));
            } else {
                decimalType.writeSlice(builder, DecimalUtils.getLongDecimalValue(hiveDecimal, decimalType.getScale()));
            }
            return;
    }
    throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory());
}
Also used : DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) DecimalType(com.facebook.presto.spi.type.DecimalType) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) CharType(com.facebook.presto.spi.type.CharType) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)

Example 14 with HiveDecimalObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector in project hive by apache.

the class DataWritableWriter method createWriter.

/**
 * Creates a writer for the specific object inspector. The returned writer will be used
 * to call Parquet API for the specific data type.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the type schema.
 * @return A ParquetWriter object used to call the Parquet API fo the specific data type.
 */
private DataWriter createWriter(ObjectInspector inspector, Type type) {
    if (type.isPrimitive()) {
        checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
        PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) inspector;
        switch(primitiveInspector.getPrimitiveCategory()) {
            case BOOLEAN:
                return new BooleanDataWriter((BooleanObjectInspector) inspector);
            case BYTE:
                return new ByteDataWriter((ByteObjectInspector) inspector);
            case SHORT:
                return new ShortDataWriter((ShortObjectInspector) inspector);
            case INT:
                return new IntDataWriter((IntObjectInspector) inspector);
            case LONG:
                return new LongDataWriter((LongObjectInspector) inspector);
            case FLOAT:
                return new FloatDataWriter((FloatObjectInspector) inspector);
            case DOUBLE:
                return new DoubleDataWriter((DoubleObjectInspector) inspector);
            case STRING:
                return new StringDataWriter((StringObjectInspector) inspector);
            case CHAR:
                return new CharDataWriter((HiveCharObjectInspector) inspector);
            case VARCHAR:
                return new VarcharDataWriter((HiveVarcharObjectInspector) inspector);
            case BINARY:
                return new BinaryDataWriter((BinaryObjectInspector) inspector);
            case TIMESTAMP:
                return new TimestampDataWriter((TimestampObjectInspector) inspector);
            case DECIMAL:
                return new DecimalDataWriter((HiveDecimalObjectInspector) inspector);
            case DATE:
                return new DateDataWriter((DateObjectInspector) inspector);
            default:
                throw new IllegalArgumentException("Unsupported primitive data type: " + primitiveInspector.getPrimitiveCategory());
        }
    } else {
        GroupType groupType = type.asGroupType();
        OriginalType originalType = type.getOriginalType();
        if (originalType != null && originalType.equals(OriginalType.LIST)) {
            checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
            return new ListDataWriter((ListObjectInspector) inspector, groupType);
        } else if (originalType != null && originalType.equals(OriginalType.MAP)) {
            checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
            return new MapDataWriter((MapObjectInspector) inspector, groupType);
        } else {
            checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
            return new StructDataWriter((StructObjectInspector) inspector, groupType);
        }
    }
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) GroupType(org.apache.parquet.schema.GroupType) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 15 with HiveDecimalObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector in project hive by apache.

the class DruidSerDe method serialize.

@Override
public Writable serialize(Object o, ObjectInspector objectInspector) throws SerDeException {
    if (objectInspector.getCategory() != ObjectInspector.Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objectInspector.getTypeName());
    }
    // Prepare the field ObjectInspectors
    StructObjectInspector soi = (StructObjectInspector) objectInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    List<Object> values = soi.getStructFieldsDataAsList(o);
    // We deserialize the result
    final Map<String, Object> value = new HashMap<>();
    for (int i = 0; i < columns.length; i++) {
        if (values.get(i) == null) {
            // null, we just add it
            value.put(columns[i], null);
            continue;
        }
        final Object res;
        switch(types[i].getPrimitiveCategory()) {
            case TIMESTAMP:
                res = ((TimestampObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).getTime();
                break;
            case TIMESTAMPLOCALTZ:
                res = ((TimestampLocalTZObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).getZonedDateTime().toInstant().toEpochMilli();
                break;
            case BYTE:
                res = ((ByteObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case SHORT:
                res = ((ShortObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case INT:
                res = ((IntObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case LONG:
                res = ((LongObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case FLOAT:
                res = ((FloatObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case DOUBLE:
                res = ((DoubleObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case DECIMAL:
                res = ((HiveDecimalObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).doubleValue();
                break;
            case CHAR:
                res = ((HiveCharObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).getValue();
                break;
            case VARCHAR:
                res = ((HiveVarcharObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).getValue();
                break;
            case STRING:
                res = ((StringObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i));
                break;
            case BOOLEAN:
                res = ((BooleanObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            default:
                throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory());
        }
        value.put(columns[i], res);
    }
    // Extract the partitions keys segments granularity and partition key if any
    // First Segment Granularity has to be here.
    final int granularityFieldIndex = columns.length;
    assert values.size() > granularityFieldIndex;
    Preconditions.checkArgument(fields.get(granularityFieldIndex).getFieldName().equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME));
    value.put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, ((TimestampObjectInspector) fields.get(granularityFieldIndex).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(granularityFieldIndex)).getTime());
    if (values.size() == columns.length + 2) {
        // Then partition number if any.
        final int partitionNumPos = granularityFieldIndex + 1;
        Preconditions.checkArgument(fields.get(partitionNumPos).getFieldName().equals(Constants.DRUID_SHARD_KEY_COL_NAME), String.format("expecting to encounter %s but was %s", Constants.DRUID_SHARD_KEY_COL_NAME, fields.get(partitionNumPos).getFieldName()));
        value.put(Constants.DRUID_SHARD_KEY_COL_NAME, ((LongObjectInspector) fields.get(partitionNumPos).getFieldObjectInspector()).get(values.get(partitionNumPos)));
    }
    return new DruidWritable(value);
}
Also used : HashMap(java.util.HashMap) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)15 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)12 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)11 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)11 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)11 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)11 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)10 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)10 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)10 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)10 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)9 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)8 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)8 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)8 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)7 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)7 HiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector)7 Text (org.apache.hadoop.io.Text)7