Search in sources :

Example 16 with HiveVarcharObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.

the class DataWritableWriter method createWriter.

/**
 * Creates a writer for the specific object inspector. The returned writer will be used
 * to call Parquet API for the specific data type.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the type schema.
 * @return A ParquetWriter object used to call the Parquet API fo the specific data type.
 */
private DataWriter createWriter(ObjectInspector inspector, Type type) {
    if (type.isPrimitive()) {
        checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
        PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) inspector;
        switch(primitiveInspector.getPrimitiveCategory()) {
            case BOOLEAN:
                return new BooleanDataWriter((BooleanObjectInspector) inspector);
            case BYTE:
                return new ByteDataWriter((ByteObjectInspector) inspector);
            case SHORT:
                return new ShortDataWriter((ShortObjectInspector) inspector);
            case INT:
                return new IntDataWriter((IntObjectInspector) inspector);
            case LONG:
                return new LongDataWriter((LongObjectInspector) inspector);
            case FLOAT:
                return new FloatDataWriter((FloatObjectInspector) inspector);
            case DOUBLE:
                return new DoubleDataWriter((DoubleObjectInspector) inspector);
            case STRING:
                return new StringDataWriter((StringObjectInspector) inspector);
            case CHAR:
                return new CharDataWriter((HiveCharObjectInspector) inspector);
            case VARCHAR:
                return new VarcharDataWriter((HiveVarcharObjectInspector) inspector);
            case BINARY:
                return new BinaryDataWriter((BinaryObjectInspector) inspector);
            case TIMESTAMP:
                return new TimestampDataWriter((TimestampObjectInspector) inspector);
            case DECIMAL:
                return new DecimalDataWriter((HiveDecimalObjectInspector) inspector);
            case DATE:
                return new DateDataWriter((DateObjectInspector) inspector);
            default:
                throw new IllegalArgumentException("Unsupported primitive data type: " + primitiveInspector.getPrimitiveCategory());
        }
    } else {
        GroupType groupType = type.asGroupType();
        LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
        if (logicalType != null && logicalType instanceof ListLogicalTypeAnnotation) {
            checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
            return new ListDataWriter((ListObjectInspector) inspector, groupType);
        } else if (logicalType != null && logicalType instanceof MapLogicalTypeAnnotation) {
            checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
            return new MapDataWriter((MapObjectInspector) inspector, groupType);
        } else {
            checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
            return new StructDataWriter((StructObjectInspector) inspector, groupType);
        }
    }
}
Also used : MapLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) ListLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) MapLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation) GroupType(org.apache.parquet.schema.GroupType) ListLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 17 with HiveVarcharObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.

the class GenericUDFCastFormat method convert.

private Object convert(Object o) throws HiveException {
    Object input;
    switch(inputOI.getPrimitiveCategory()) {
        case STRING:
            input = ((StringObjectInspector) inputOI).getPrimitiveJavaObject(o);
            break;
        case CHAR:
            input = ((HiveCharObjectInspector) inputOI).getPrimitiveJavaObject(o).getStrippedValue();
            break;
        case VARCHAR:
            input = ((HiveVarcharObjectInspector) inputOI).getPrimitiveJavaObject(o).toString();
            break;
        case TIMESTAMP:
            input = ((TimestampObjectInspector) inputOI).getPrimitiveWritableObject(o).getTimestamp();
            break;
        case DATE:
            input = ((DateObjectInspector) inputOI).getPrimitiveWritableObject(o).get();
            break;
        default:
            throw new HiveException("Input type " + inputOI.getPrimitiveCategory() + " not valid");
    }
    // format here
    Object formattedOutput = null;
    if (inputOI.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.DATE || inputOI.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP) {
        if (inputOI.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.DATE) {
            try {
                formattedOutput = formatter.format((Date) input);
            } catch (IllegalArgumentException e) {
                return null;
            }
        } else {
            try {
                formattedOutput = formatter.format((Timestamp) input);
            } catch (IllegalArgumentException e) {
                return null;
            }
        }
        if (formattedOutput == null) {
            return null;
        }
    }
    // parse and create Writables
    switch(outputOI.getPrimitiveCategory()) {
        case STRING:
            return new Text((String) formattedOutput);
        case CHAR:
            return ((SettableHiveCharObjectInspector) outputOI).create(new HiveChar((String) formattedOutput, -1));
        case VARCHAR:
            return ((SettableHiveVarcharObjectInspector) outputOI).create(new HiveVarchar((String) formattedOutput, -1));
        case TIMESTAMP:
            try {
                Timestamp t = formatter.parseTimestamp((String) input);
                if (t == null) {
                    return null;
                }
                return ((SettableTimestampObjectInspector) outputOI).create(t);
            } catch (IllegalArgumentException e) {
                return null;
            }
        case DATE:
            try {
                Date d = formatter.parseDate((String) input);
                if (d == null) {
                    return null;
                }
                return ((SettableDateObjectInspector) outputOI).create(d);
            } catch (IllegalArgumentException e) {
                return null;
            }
        default:
            throw new HiveException("Output type " + outputOI.getPrimitiveCategory() + " not valid");
    }
}
Also used : DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) SettableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Date(org.apache.hadoop.hive.common.type.Date) SettableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) SettableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector) SettableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector) SettableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) SettableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector) SettableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector) SettableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector)

Example 18 with HiveVarcharObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.

the class DruidSerDe method serialize.

@Override
public Writable serialize(Object o, ObjectInspector objectInspector) throws SerDeException {
    if (objectInspector.getCategory() != ObjectInspector.Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objectInspector.getTypeName());
    }
    // Prepare the field ObjectInspectors
    StructObjectInspector soi = (StructObjectInspector) objectInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    List<Object> values = soi.getStructFieldsDataAsList(o);
    // We deserialize the result
    final Map<String, Object> value = new HashMap<>();
    for (int i = 0; i < columns.length; i++) {
        if (values.get(i) == null) {
            // null, we just add it
            value.put(columns[i], null);
            continue;
        }
        final Object res;
        switch(types[i].getPrimitiveCategory()) {
            case TIMESTAMP:
                res = ((TimestampObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).toEpochMilli();
                break;
            case TIMESTAMPLOCALTZ:
                res = ((TimestampLocalTZObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).getZonedDateTime().toInstant().toEpochMilli();
                break;
            case BYTE:
                res = ((ByteObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case SHORT:
                res = ((ShortObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case INT:
                res = ((IntObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case LONG:
                res = ((LongObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case FLOAT:
                res = ((FloatObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case DOUBLE:
                res = ((DoubleObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i));
                break;
            case CHAR:
                res = ((HiveCharObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).getValue();
                break;
            case VARCHAR:
                res = ((HiveVarcharObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i)).getValue();
                break;
            case STRING:
                res = ((StringObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i));
                break;
            case BOOLEAN:
                res = ((BooleanObjectInspector) fields.get(i).getFieldObjectInspector()).get(values.get(i)) ? 1L : 0L;
                break;
            default:
                throw new SerDeException("Unsupported type: " + types[i].getPrimitiveCategory());
        }
        value.put(columns[i], res);
    }
    // Extract the partitions keys segments granularity and partition key if any
    // First Segment Granularity has to be here.
    final int granularityFieldIndex = columns.length;
    assert values.size() > granularityFieldIndex;
    Preconditions.checkArgument(fields.get(granularityFieldIndex).getFieldName().equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME));
    Timestamp timestamp = ((TimestampObjectInspector) fields.get(granularityFieldIndex).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(granularityFieldIndex));
    Preconditions.checkNotNull(timestamp, "Timestamp column cannot have null value");
    value.put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, timestamp.toEpochMilli());
    if (values.size() == columns.length + 2) {
        // Then partition number if any.
        final int partitionNumPos = granularityFieldIndex + 1;
        Preconditions.checkArgument(fields.get(partitionNumPos).getFieldName().equals(Constants.DRUID_SHARD_KEY_COL_NAME), String.format("expecting to encounter %s but was %s", Constants.DRUID_SHARD_KEY_COL_NAME, fields.get(partitionNumPos).getFieldName()));
        value.put(Constants.DRUID_SHARD_KEY_COL_NAME, ((LongObjectInspector) fields.get(partitionNumPos).getFieldObjectInspector()).get(values.get(partitionNumPos)));
    }
    return new DruidWritable(value);
}
Also used : HashMap(java.util.HashMap) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 19 with HiveVarcharObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.

the class LazyBinarySerDe method serialize.

/**
 * A recursive function that serialize an object to a byte buffer based on its
 * object inspector.
 *
 * @param byteStream
 *          the byte stream storing the serialization data
 * @param obj
 *          the object to serialize
 * @param objInspector
 *          the object inspector
 * @param skipLengthPrefix a boolean indicating whether length prefix is
 *          needed for list/map/struct
 * @param warnedOnceNullMapKey a boolean indicating whether a warning
 *          has been issued once already when encountering null map keys
 */
public static void serialize(RandomAccessOutput byteStream, Object obj, ObjectInspector objInspector, boolean skipLengthPrefix, BooleanRef warnedOnceNullMapKey) throws SerDeException {
    // do nothing for null object
    if (null == obj) {
        return;
    }
    switch(objInspector.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
                switch(poi.getPrimitiveCategory()) {
                    case VOID:
                        {
                            return;
                        }
                    case BOOLEAN:
                        {
                            boolean v = ((BooleanObjectInspector) poi).get(obj);
                            byteStream.write((byte) (v ? 1 : 0));
                            return;
                        }
                    case BYTE:
                        {
                            ByteObjectInspector boi = (ByteObjectInspector) poi;
                            byte v = boi.get(obj);
                            byteStream.write(v);
                            return;
                        }
                    case SHORT:
                        {
                            ShortObjectInspector spoi = (ShortObjectInspector) poi;
                            short v = spoi.get(obj);
                            byteStream.write((byte) (v >> 8));
                            byteStream.write((byte) (v));
                            return;
                        }
                    case INT:
                        {
                            IntObjectInspector ioi = (IntObjectInspector) poi;
                            int v = ioi.get(obj);
                            LazyBinaryUtils.writeVInt(byteStream, v);
                            return;
                        }
                    case LONG:
                        {
                            LongObjectInspector loi = (LongObjectInspector) poi;
                            long v = loi.get(obj);
                            LazyBinaryUtils.writeVLong(byteStream, v);
                            return;
                        }
                    case FLOAT:
                        {
                            FloatObjectInspector foi = (FloatObjectInspector) poi;
                            int v = Float.floatToIntBits(foi.get(obj));
                            byteStream.write((byte) (v >> 24));
                            byteStream.write((byte) (v >> 16));
                            byteStream.write((byte) (v >> 8));
                            byteStream.write((byte) (v));
                            return;
                        }
                    case DOUBLE:
                        {
                            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                            LazyBinaryUtils.writeDouble(byteStream, doi.get(obj));
                            return;
                        }
                    case STRING:
                        {
                            StringObjectInspector soi = (StringObjectInspector) poi;
                            Text t = soi.getPrimitiveWritableObject(obj);
                            serializeText(byteStream, t, skipLengthPrefix);
                            return;
                        }
                    case CHAR:
                        {
                            HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi;
                            Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
                            serializeText(byteStream, t, skipLengthPrefix);
                            return;
                        }
                    case VARCHAR:
                        {
                            HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
                            Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
                            serializeText(byteStream, t, skipLengthPrefix);
                            return;
                        }
                    case BINARY:
                        {
                            BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
                            BytesWritable bw = baoi.getPrimitiveWritableObject(obj);
                            int length = bw.getLength();
                            if (!skipLengthPrefix) {
                                LazyBinaryUtils.writeVInt(byteStream, length);
                            } else {
                                if (length == 0) {
                                    throw new RuntimeException("LazyBinaryColumnarSerde cannot serialize a non-null zero " + "length binary field. Consider using either LazyBinarySerde or ColumnarSerde.");
                                }
                            }
                            byteStream.write(bw.getBytes(), 0, length);
                            return;
                        }
                    case DATE:
                        {
                            DateWritableV2 d = ((DateObjectInspector) poi).getPrimitiveWritableObject(obj);
                            writeDateToByteStream(byteStream, d);
                            return;
                        }
                    case TIMESTAMP:
                        {
                            TimestampObjectInspector toi = (TimestampObjectInspector) poi;
                            TimestampWritableV2 t = toi.getPrimitiveWritableObject(obj);
                            t.writeToByteStream(byteStream);
                            return;
                        }
                    case TIMESTAMPLOCALTZ:
                        {
                            TimestampLocalTZWritable t = ((TimestampLocalTZObjectInspector) poi).getPrimitiveWritableObject(obj);
                            t.writeToByteStream(byteStream);
                            return;
                        }
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(obj);
                            intervalYearMonth.writeToByteStream(byteStream);
                            return;
                        }
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(obj);
                            intervalDayTime.writeToByteStream(byteStream);
                            return;
                        }
                    case DECIMAL:
                        {
                            HiveDecimalObjectInspector bdoi = (HiveDecimalObjectInspector) poi;
                            HiveDecimalWritable t = bdoi.getPrimitiveWritableObject(obj);
                            if (t == null) {
                                return;
                            }
                            writeToByteStream(byteStream, t);
                            return;
                        }
                    default:
                        {
                            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
                        }
                }
            }
        case LIST:
            {
                ListObjectInspector loi = (ListObjectInspector) objInspector;
                ObjectInspector eoi = loi.getListElementObjectInspector();
                int byteSizeStart = 0;
                int listStart = 0;
                if (!skipLengthPrefix) {
                    // 1/ reserve spaces for the byte size of the list
                    // which is a integer and takes four bytes
                    byteSizeStart = byteStream.getLength();
                    byteStream.reserve(4);
                    listStart = byteStream.getLength();
                }
                // 2/ write the size of the list as a VInt
                int size = loi.getListLength(obj);
                LazyBinaryUtils.writeVInt(byteStream, size);
                // 3/ write the null bytes
                byte nullByte = 0;
                for (int eid = 0; eid < size; eid++) {
                    // set the bit to 1 if an element is not null
                    if (null != loi.getListElement(obj, eid)) {
                        nullByte |= 1 << (eid % 8);
                    }
                    // if this is the last element
                    if (7 == eid % 8 || eid == size - 1) {
                        byteStream.write(nullByte);
                        nullByte = 0;
                    }
                }
                // 4/ write element by element from the list
                for (int eid = 0; eid < size; eid++) {
                    serialize(byteStream, loi.getListElement(obj, eid), eoi, false, warnedOnceNullMapKey);
                }
                if (!skipLengthPrefix) {
                    // 5/ update the list byte size
                    int listEnd = byteStream.getLength();
                    int listSize = listEnd - listStart;
                    writeSizeAtOffset(byteStream, byteSizeStart, listSize);
                }
                return;
            }
        case MAP:
            {
                MapObjectInspector moi = (MapObjectInspector) objInspector;
                ObjectInspector koi = moi.getMapKeyObjectInspector();
                ObjectInspector voi = moi.getMapValueObjectInspector();
                Map<?, ?> map = moi.getMap(obj);
                int byteSizeStart = 0;
                int mapStart = 0;
                if (!skipLengthPrefix) {
                    // 1/ reserve spaces for the byte size of the map
                    // which is a integer and takes four bytes
                    byteSizeStart = byteStream.getLength();
                    byteStream.reserve(4);
                    mapStart = byteStream.getLength();
                }
                // 2/ write the size of the map which is a VInt
                int size = map.size();
                LazyBinaryUtils.writeVInt(byteStream, size);
                // 3/ write the null bytes
                int b = 0;
                byte nullByte = 0;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    // set the bit to 1 if a key is not null
                    if (null != entry.getKey()) {
                        nullByte |= 1 << (b % 8);
                    } else if (warnedOnceNullMapKey != null) {
                        if (!warnedOnceNullMapKey.value) {
                            LOG.warn("Null map key encountered! Ignoring similar problems.");
                        }
                        warnedOnceNullMapKey.value = true;
                    }
                    b++;
                    // set the bit to 1 if a value is not null
                    if (null != entry.getValue()) {
                        nullByte |= 1 << (b % 8);
                    }
                    b++;
                    // or if this is the last key-value pair
                    if (0 == b % 8 || b == size * 2) {
                        byteStream.write(nullByte);
                        nullByte = 0;
                    }
                }
                // 4/ write key-value pairs one by one
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    serialize(byteStream, entry.getKey(), koi, false, warnedOnceNullMapKey);
                    serialize(byteStream, entry.getValue(), voi, false, warnedOnceNullMapKey);
                }
                if (!skipLengthPrefix) {
                    // 5/ update the byte size of the map
                    int mapEnd = byteStream.getLength();
                    int mapSize = mapEnd - mapStart;
                    writeSizeAtOffset(byteStream, byteSizeStart, mapSize);
                }
                return;
            }
        case STRUCT:
        case UNION:
            {
                int byteSizeStart = 0;
                int typeStart = 0;
                if (!skipLengthPrefix) {
                    // 1/ reserve spaces for the byte size of the struct
                    // which is a integer and takes four bytes
                    byteSizeStart = byteStream.getLength();
                    byteStream.reserve(4);
                    typeStart = byteStream.getLength();
                }
                if (ObjectInspector.Category.STRUCT.equals(objInspector.getCategory())) {
                    // 2/ serialize the struct
                    serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey);
                } else {
                    // 2/ serialize the union
                    serializeUnion(byteStream, obj, (UnionObjectInspector) objInspector, warnedOnceNullMapKey);
                }
                if (!skipLengthPrefix) {
                    // 3/ update the byte size of the struct
                    int typeEnd = byteStream.getLength();
                    int typeSize = typeEnd - typeStart;
                    writeSizeAtOffset(byteStream, byteSizeStart, typeSize);
                }
                return;
            }
        default:
            {
                throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
            }
    }
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 20 with HiveVarcharObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.

the class ObjectInspectorUtils method hashCode.

public static int hashCode(Object o, ObjectInspector objIns) {
    if (o == null) {
        return 0;
    }
    switch(objIns.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objIns);
                switch(poi.getPrimitiveCategory()) {
                    case VOID:
                        return 0;
                    case BOOLEAN:
                        return ((BooleanObjectInspector) poi).get(o) ? 1 : 0;
                    case BYTE:
                        return ((ByteObjectInspector) poi).get(o);
                    case SHORT:
                        return ((ShortObjectInspector) poi).get(o);
                    case INT:
                        return ((IntObjectInspector) poi).get(o);
                    case LONG:
                        {
                            long a = ((LongObjectInspector) poi).get(o);
                            return (int) ((a >>> 32) ^ a);
                        }
                    case FLOAT:
                        return Float.floatToIntBits(((FloatObjectInspector) poi).get(o));
                    case DOUBLE:
                        {
                            // This hash function returns the same result as Double.hashCode()
                            // while DoubleWritable.hashCode returns a different result.
                            long a = Double.doubleToLongBits(((DoubleObjectInspector) poi).get(o));
                            return (int) ((a >>> 32) ^ a);
                        }
                    case STRING:
                        {
                            // This hash function returns the same result as String.hashCode() when
                            // all characters are ASCII, while Text.hashCode() always returns a
                            // different result.
                            Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o);
                            int r = 0;
                            for (int i = 0; i < t.getLength(); i++) {
                                r = r * 31 + t.getBytes()[i];
                            }
                            return r;
                        }
                    case CHAR:
                        return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
                    case VARCHAR:
                        return ((HiveVarcharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
                    case BINARY:
                        return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
                    case DATE:
                        return ((DateObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
                    case TIMESTAMP:
                        // Use old timestamp writable hash code for backwards compatibility
                        TimestampWritable ts = new TimestampWritable(java.sql.Timestamp.valueOf(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o).toString()));
                        return ts.hashCode();
                    case TIMESTAMPLOCALTZ:
                        TimestampLocalTZWritable tstz = ((TimestampLocalTZObjectInspector) poi).getPrimitiveWritableObject(o);
                        return tstz.hashCode();
                    case INTERVAL_YEAR_MONTH:
                        HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(o);
                        return intervalYearMonth.hashCode();
                    case INTERVAL_DAY_TIME:
                        HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(o);
                        return intervalDayTime.hashCode();
                    case DECIMAL:
                        // compatible hash code.
                        return ((HiveDecimalObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
                    default:
                        {
                            throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory());
                        }
                }
            }
        case LIST:
            {
                int r = 0;
                ListObjectInspector listOI = (ListObjectInspector) objIns;
                ObjectInspector elemOI = listOI.getListElementObjectInspector();
                for (int ii = 0; ii < listOI.getListLength(o); ++ii) {
                    r = 31 * r + hashCode(listOI.getListElement(o, ii), elemOI);
                }
                return r;
            }
        case MAP:
            {
                int r = 0;
                MapObjectInspector mapOI = (MapObjectInspector) objIns;
                ObjectInspector keyOI = mapOI.getMapKeyObjectInspector();
                ObjectInspector valueOI = mapOI.getMapValueObjectInspector();
                Map<?, ?> map = mapOI.getMap(o);
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    r += hashCode(entry.getKey(), keyOI) ^ hashCode(entry.getValue(), valueOI);
                }
                return r;
            }
        case STRUCT:
            int r = 0;
            StructObjectInspector structOI = (StructObjectInspector) objIns;
            List<? extends StructField> fields = structOI.getAllStructFieldRefs();
            for (StructField field : fields) {
                r = 31 * r + hashCode(structOI.getStructFieldData(o, field), field.getFieldObjectInspector());
            }
            return r;
        case UNION:
            UnionObjectInspector uOI = (UnionObjectInspector) objIns;
            byte tag = uOI.getTag(o);
            return hashCode(uOI.getField(o), uOI.getObjectInspectors().get(tag));
        default:
            throw new RuntimeException("Unknown type: " + objIns.getTypeName());
    }
}
Also used : SettableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) SettableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector) SettableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) SettableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector) SettableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) SettableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) SettableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) SettableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector) SettableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector) SettableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) SettableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector) SettableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) SettableTimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampLocalTZObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) SettableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) SettableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector) SettableHiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveIntervalDayTimeObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) SettableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector) AbstractPrimitiveWritableObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveWritableObjectInspector) SettableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) SettableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector) SettableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector) JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) SettableHiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveIntervalYearMonthObjectInspector) SettableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) SettableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableByteObjectInspector) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) SettableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) SettableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)26 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)25 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)25 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)24 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)23 HiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector)21 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)20 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)19 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)17 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)16 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)16 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)16 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)16 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)15 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)15 Map (java.util.Map)14 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)14 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)13 List (java.util.List)12 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)12