Search in sources :

Example 21 with UnionTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo in project hive by apache.

the class VectorSerializeRow method serializeUnionWrite.

private void serializeUnionWrite(UnionColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException {
    UnionTypeInfo typeInfo = (UnionTypeInfo) field.typeInfo;
    UnionObjectInspector objectInspector = (UnionObjectInspector) field.objectInspector;
    final byte tag = (byte) colVector.tags[adjustedBatchIndex];
    final ColumnVector fieldColumnVector = colVector.fields[tag];
    final Field childField = field.children[tag];
    serializeWrite.beginUnion(tag);
    serializeWrite(fieldColumnVector, childField, adjustedBatchIndex);
    serializeWrite.finishUnion();
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 22 with UnionTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo in project nifi by apache.

the class NiFiOrcUtils method convertToORCObject.

public static Object convertToORCObject(TypeInfo typeInfo, Object o) {
    if (o != null) {
        if (typeInfo instanceof UnionTypeInfo) {
            OrcUnion union = new OrcUnion();
            // Need to find which of the union types correspond to the primitive object
            TypeInfo objectTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(ObjectInspectorFactory.getReflectionObjectInspector(o.getClass(), ObjectInspectorFactory.ObjectInspectorOptions.JAVA));
            List<TypeInfo> unionTypeInfos = ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos();
            int index = 0;
            while (index < unionTypeInfos.size() && !unionTypeInfos.get(index).equals(objectTypeInfo)) {
                index++;
            }
            if (index < unionTypeInfos.size()) {
                union.set((byte) index, convertToORCObject(objectTypeInfo, o));
            } else {
                throw new IllegalArgumentException("Object Type for class " + o.getClass().getName() + " not in Union declaration");
            }
            return union;
        }
        if (o instanceof Integer) {
            return new IntWritable((int) o);
        }
        if (o instanceof Boolean) {
            return new BooleanWritable((boolean) o);
        }
        if (o instanceof Long) {
            return new LongWritable((long) o);
        }
        if (o instanceof Float) {
            return new FloatWritable((float) o);
        }
        if (o instanceof Double) {
            return new DoubleWritable((double) o);
        }
        if (o instanceof String || o instanceof Utf8 || o instanceof GenericData.EnumSymbol) {
            return new Text(o.toString());
        }
        if (o instanceof ByteBuffer) {
            return new BytesWritable(((ByteBuffer) o).array());
        }
        if (o instanceof int[]) {
            int[] intArray = (int[]) o;
            return Arrays.stream(intArray).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("int"), element)).collect(Collectors.toList());
        }
        if (o instanceof long[]) {
            long[] longArray = (long[]) o;
            return Arrays.stream(longArray).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("bigint"), element)).collect(Collectors.toList());
        }
        if (o instanceof float[]) {
            float[] floatArray = (float[]) o;
            return IntStream.range(0, floatArray.length).mapToDouble(i -> floatArray[i]).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("float"), (float) element)).collect(Collectors.toList());
        }
        if (o instanceof double[]) {
            double[] doubleArray = (double[]) o;
            return Arrays.stream(doubleArray).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("double"), element)).collect(Collectors.toList());
        }
        if (o instanceof boolean[]) {
            boolean[] booleanArray = (boolean[]) o;
            return IntStream.range(0, booleanArray.length).map(i -> booleanArray[i] ? 1 : 0).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("boolean"), element == 1)).collect(Collectors.toList());
        }
        if (o instanceof GenericData.Array) {
            GenericData.Array array = ((GenericData.Array) o);
            // The type information in this case is interpreted as a List
            TypeInfo listTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
            return array.stream().map((element) -> convertToORCObject(listTypeInfo, element)).collect(Collectors.toList());
        }
        if (o instanceof List) {
            return o;
        }
        if (o instanceof Map) {
            Map map = new HashMap();
            TypeInfo keyInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
            TypeInfo valueInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
            // Unions are not allowed as key/value types, so if we convert the key and value objects,
            // they should return Writable objects
            ((Map) o).forEach((key, value) -> {
                Object keyObject = convertToORCObject(keyInfo, key);
                Object valueObject = convertToORCObject(valueInfo, value);
                if (keyObject == null) {
                    throw new IllegalArgumentException("Maps' key cannot be null");
                }
                map.put(keyObject, valueObject);
            });
            return map;
        }
        if (o instanceof GenericData.Record) {
            GenericData.Record record = (GenericData.Record) o;
            TypeInfo recordSchema = NiFiOrcUtils.getOrcField(record.getSchema());
            List<Schema.Field> recordFields = record.getSchema().getFields();
            if (recordFields != null) {
                Object[] fieldObjects = new Object[recordFields.size()];
                for (int i = 0; i < recordFields.size(); i++) {
                    Schema.Field field = recordFields.get(i);
                    Schema fieldSchema = field.schema();
                    Object fieldObject = record.get(field.name());
                    fieldObjects[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), fieldObject);
                }
                return NiFiOrcUtils.createOrcStruct(recordSchema, fieldObjects);
            }
        }
        throw new IllegalArgumentException("Error converting object of type " + o.getClass().getName() + " to ORC type " + typeInfo.getTypeName());
    } else {
        return null;
    }
}
Also used : IntStream(java.util.stream.IntStream) TypeInfoUtils(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils) HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE) Arrays(java.util.Arrays) Text(org.apache.hadoop.io.Text) HashMap(java.util.HashMap) StringUtils(org.apache.commons.lang3.StringUtils) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteBuffer(java.nio.ByteBuffer) GenericData(org.apache.avro.generic.GenericData) ArrayList(java.util.ArrayList) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) HIVE_ORC_DEFAULT_BLOCK_SIZE(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE) BytesWritable(org.apache.hadoop.io.BytesWritable) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) IntWritable(org.apache.hadoop.io.IntWritable) OutputStream(java.io.OutputStream) Utf8(org.apache.avro.util.Utf8) HIVE_ORC_DEFAULT_BLOCK_PADDING(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING) Schema(org.apache.avro.Schema) HIVE_ORC_WRITE_FORMAT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) HiveConf(org.apache.hadoop.hive.conf.HiveConf) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) List(java.util.List) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) BloomFilterIO(org.apache.hadoop.hive.ql.io.filters.BloomFilterIO) FloatWritable(org.apache.hadoop.io.FloatWritable) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) DoubleWritable(org.apache.hadoop.io.DoubleWritable) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) GenericData(org.apache.avro.generic.GenericData) ByteBuffer(java.nio.ByteBuffer) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Utf8(org.apache.avro.util.Utf8) HashMap(java.util.HashMap) Map(java.util.Map) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 23 with UnionTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo in project hive by apache.

the class VectorVerifyFast method serializeWrite.

public static void serializeWrite(SerializeWrite serializeWrite, TypeInfo typeInfo, Object object) throws IOException {
    if (object == null) {
        serializeWrite.writeNull();
        return;
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                switch(primitiveTypeInfo.getPrimitiveCategory()) {
                    case BOOLEAN:
                        {
                            boolean value = ((BooleanWritable) object).get();
                            serializeWrite.writeBoolean(value);
                        }
                        break;
                    case BYTE:
                        {
                            byte value = ((ByteWritable) object).get();
                            serializeWrite.writeByte(value);
                        }
                        break;
                    case SHORT:
                        {
                            short value = ((ShortWritable) object).get();
                            serializeWrite.writeShort(value);
                        }
                        break;
                    case INT:
                        {
                            int value = ((IntWritable) object).get();
                            serializeWrite.writeInt(value);
                        }
                        break;
                    case LONG:
                        {
                            long value = ((LongWritable) object).get();
                            serializeWrite.writeLong(value);
                        }
                        break;
                    case FLOAT:
                        {
                            float value = ((FloatWritable) object).get();
                            serializeWrite.writeFloat(value);
                        }
                        break;
                    case DOUBLE:
                        {
                            double value = ((DoubleWritable) object).get();
                            serializeWrite.writeDouble(value);
                        }
                        break;
                    case STRING:
                        {
                            Text value = (Text) object;
                            byte[] bytes = value.getBytes();
                            int byteLength = value.getLength();
                            serializeWrite.writeString(bytes, 0, byteLength);
                        }
                        break;
                    case CHAR:
                        {
                            HiveChar value = ((HiveCharWritable) object).getHiveChar();
                            serializeWrite.writeHiveChar(value);
                        }
                        break;
                    case VARCHAR:
                        {
                            HiveVarchar value = ((HiveVarcharWritable) object).getHiveVarchar();
                            serializeWrite.writeHiveVarchar(value);
                        }
                        break;
                    case DECIMAL:
                        {
                            HiveDecimal value = ((HiveDecimalWritable) object).getHiveDecimal();
                            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                            serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
                        }
                        break;
                    case DATE:
                        {
                            Date value = ((DateWritableV2) object).get();
                            serializeWrite.writeDate(value);
                        }
                        break;
                    case TIMESTAMP:
                        {
                            Timestamp value = ((TimestampWritableV2) object).getTimestamp();
                            serializeWrite.writeTimestamp(value);
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
                            serializeWrite.writeHiveIntervalYearMonth(value);
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
                            serializeWrite.writeHiveIntervalDayTime(value);
                        }
                        break;
                    case BINARY:
                        {
                            BytesWritable byteWritable = (BytesWritable) object;
                            byte[] binaryBytes = byteWritable.getBytes();
                            int length = byteWritable.getLength();
                            serializeWrite.writeBinary(binaryBytes, 0, length);
                        }
                        break;
                    default:
                        throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
                }
            }
            break;
        case LIST:
            {
                ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
                TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
                ArrayList<Object> elements = (ArrayList<Object>) object;
                serializeWrite.beginList(elements);
                boolean isFirst = true;
                for (Object elementObject : elements) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateList();
                    }
                    if (elementObject == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, elementTypeInfo, elementObject);
                    }
                }
                serializeWrite.finishList();
            }
            break;
        case MAP:
            {
                MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
                TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
                TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
                HashMap<Object, Object> hashMap = (HashMap<Object, Object>) object;
                serializeWrite.beginMap(hashMap);
                boolean isFirst = true;
                for (Map.Entry<Object, Object> entry : hashMap.entrySet()) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateKeyValuePair();
                    }
                    if (entry.getKey() == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, keyTypeInfo, entry.getKey());
                    }
                    serializeWrite.separateKey();
                    if (entry.getValue() == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, valueTypeInfo, entry.getValue());
                    }
                }
                serializeWrite.finishMap();
            }
            break;
        case STRUCT:
            {
                StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                List<Object> fieldValues = (List<Object>) object;
                final int size = fieldValues.size();
                serializeWrite.beginStruct(fieldValues);
                boolean isFirst = true;
                for (int i = 0; i < size; i++) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateStruct();
                    }
                    serializeWrite(serializeWrite, fieldTypeInfos.get(i), fieldValues.get(i));
                }
                serializeWrite.finishStruct();
            }
            break;
        case UNION:
            {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final int size = fieldTypeInfos.size();
                StandardUnionObjectInspector.StandardUnion standardUnion = (StandardUnionObjectInspector.StandardUnion) object;
                byte tag = standardUnion.getTag();
                serializeWrite.beginUnion(tag);
                serializeWrite(serializeWrite, fieldTypeInfos.get(tag), standardUnion.getObject());
                serializeWrite.finishUnion();
            }
            break;
        default:
            throw new Error("Unknown category " + typeInfo.getCategory().name());
    }
}
Also used : StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) ArrayList(java.util.ArrayList) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ArrayList(java.util.ArrayList) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(org.apache.hadoop.hive.common.type.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 24 with UnionTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo in project hive by apache.

the class BinarySortableSerDe method deserialize.

static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, byte nullMarker, byte notNullMarker, Object reuse) throws IOException {
    // Is this field a null?
    byte isNull = buffer.read(invert);
    if (isNull == nullMarker) {
        return null;
    }
    assert (isNull == notNullMarker);
    switch(type.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
                switch(ptype.getPrimitiveCategory()) {
                    case VOID:
                        {
                            return null;
                        }
                    case BOOLEAN:
                        {
                            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
                            byte b = buffer.read(invert);
                            assert (b == 1 || b == 2);
                            r.set(b == 2);
                            return r;
                        }
                    case BYTE:
                        {
                            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
                            r.set((byte) (buffer.read(invert) ^ 0x80));
                            return r;
                        }
                    case SHORT:
                        {
                            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
                            int v = buffer.read(invert) ^ 0x80;
                            v = (v << 8) + (buffer.read(invert) & 0xff);
                            r.set((short) v);
                            return r;
                        }
                    case INT:
                        {
                            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
                            r.set(deserializeInt(buffer, invert));
                            return r;
                        }
                    case LONG:
                        {
                            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
                            r.set(deserializeLong(buffer, invert));
                            return r;
                        }
                    case FLOAT:
                        {
                            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
                            int v = 0;
                            for (int i = 0; i < 4; i++) {
                                v = (v << 8) + (buffer.read(invert) & 0xff);
                            }
                            if ((v & (1 << 31)) == 0) {
                                // negative number, flip all bits
                                v = ~v;
                            } else {
                                // positive number, flip the first bit
                                v = v ^ (1 << 31);
                            }
                            r.set(Float.intBitsToFloat(v));
                            return r;
                        }
                    case DOUBLE:
                        {
                            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
                            long v = 0;
                            for (int i = 0; i < 8; i++) {
                                v = (v << 8) + (buffer.read(invert) & 0xff);
                            }
                            if ((v & (1L << 63)) == 0) {
                                // negative number, flip all bits
                                v = ~v;
                            } else {
                                // positive number, flip the first bit
                                v = v ^ (1L << 63);
                            }
                            r.set(Double.longBitsToDouble(v));
                            return r;
                        }
                    case STRING:
                        {
                            Text r = reuse == null ? new Text() : (Text) reuse;
                            return deserializeText(buffer, invert, r);
                        }
                    case CHAR:
                        {
                            HiveCharWritable r = reuse == null ? new HiveCharWritable() : (HiveCharWritable) reuse;
                            // Use internal text member to read value
                            deserializeText(buffer, invert, r.getTextValue());
                            r.enforceMaxLength(getCharacterMaxLength(type));
                            return r;
                        }
                    case VARCHAR:
                        {
                            HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
                            // Use HiveVarchar's internal Text member to read the value.
                            deserializeText(buffer, invert, r.getTextValue());
                            // If we cache helper data for deserialization we could avoid having
                            // to call getVarcharMaxLength() on every deserialize call.
                            r.enforceMaxLength(getCharacterMaxLength(type));
                            return r;
                        }
                    case BINARY:
                        {
                            BytesWritable bw = new BytesWritable();
                            // Get the actual length first
                            int start = buffer.tell();
                            int length = 0;
                            do {
                                byte b = buffer.read(invert);
                                if (b == 0) {
                                    // end of string
                                    break;
                                }
                                if (b == 1) {
                                    // the last char is an escape char. read the actual char
                                    buffer.read(invert);
                                }
                                length++;
                            } while (true);
                            if (length == buffer.tell() - start) {
                                // No escaping happened, so we are already done.
                                bw.set(buffer.getData(), start, length);
                            } else {
                                // Escaping happened, we need to copy byte-by-byte.
                                // 1. Set the length first.
                                bw.set(buffer.getData(), start, length);
                                // 2. Reset the pointer.
                                buffer.seek(start);
                                // 3. Copy the data.
                                byte[] rdata = bw.getBytes();
                                for (int i = 0; i < length; i++) {
                                    byte b = buffer.read(invert);
                                    if (b == 1) {
                                        // The last char is an escape char, read the actual char.
                                        // The serialization format escape \0 to \1, and \1 to \2,
                                        // to make sure the string is null-terminated.
                                        b = (byte) (buffer.read(invert) - 1);
                                    }
                                    rdata[i] = b;
                                }
                                // 4. Read the null terminator.
                                byte b = buffer.read(invert);
                                assert (b == 0);
                            }
                            return bw;
                        }
                    case DATE:
                        {
                            DateWritableV2 d = reuse == null ? new DateWritableV2() : (DateWritableV2) reuse;
                            d.set(deserializeInt(buffer, invert));
                            return d;
                        }
                    case TIMESTAMP:
                        TimestampWritableV2 t = (reuse == null ? new TimestampWritableV2() : (TimestampWritableV2) reuse);
                        byte[] bytes = new byte[TimestampWritableV2.BINARY_SORTABLE_LENGTH];
                        for (int i = 0; i < bytes.length; i++) {
                            bytes[i] = buffer.read(invert);
                        }
                        t.setBinarySortable(bytes, 0);
                        return t;
                    case TIMESTAMPLOCALTZ:
                        TimestampLocalTZWritable tstz = (reuse == null ? new TimestampLocalTZWritable() : (TimestampLocalTZWritable) reuse);
                        byte[] data = new byte[TimestampLocalTZWritable.BINARY_SORTABLE_LENGTH];
                        for (int i = 0; i < data.length; i++) {
                            data[i] = buffer.read(invert);
                        }
                        // Across MR process boundary tz is normalized and stored in type
                        // and is not carried in data for each row.
                        tstz.fromBinarySortable(data, 0, ((TimestampLocalTZTypeInfo) type).timeZone());
                        return tstz;
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonthWritable i = reuse == null ? new HiveIntervalYearMonthWritable() : (HiveIntervalYearMonthWritable) reuse;
                            i.set(deserializeInt(buffer, invert));
                            return i;
                        }
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTimeWritable i = reuse == null ? new HiveIntervalDayTimeWritable() : (HiveIntervalDayTimeWritable) reuse;
                            long totalSecs = deserializeLong(buffer, invert);
                            int nanos = deserializeInt(buffer, invert);
                            i.set(totalSecs, nanos);
                            return i;
                        }
                    case DECIMAL:
                        {
                            // See serialization of decimal for explanation (below)
                            HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);
                            int b = buffer.read(invert) - 1;
                            assert (b == 1 || b == -1 || b == 0);
                            boolean positive = b != -1;
                            int factor = buffer.read(invert) ^ 0x80;
                            for (int i = 0; i < 3; i++) {
                                factor = (factor << 8) + (buffer.read(invert) & 0xff);
                            }
                            if (!positive) {
                                factor = -factor;
                            }
                            int start = buffer.tell();
                            int length = 0;
                            do {
                                b = buffer.read(positive ? invert : !invert);
                                assert (b != 1);
                                if (b == 0) {
                                    // end of digits
                                    break;
                                }
                                length++;
                            } while (true);
                            final byte[] decimalBuffer = new byte[length];
                            buffer.seek(start);
                            for (int i = 0; i < length; ++i) {
                                decimalBuffer[i] = buffer.read(positive ? invert : !invert);
                            }
                            // read the null byte again
                            buffer.read(positive ? invert : !invert);
                            String digits = new String(decimalBuffer, 0, length, decimalCharSet);
                            BigInteger bi = new BigInteger(digits);
                            HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor - length);
                            if (!positive) {
                                bd = bd.negate();
                            }
                            bdw.set(bd);
                            return bdw;
                        }
                    default:
                        {
                            throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
                        }
                }
            }
        case LIST:
            {
                ListTypeInfo ltype = (ListTypeInfo) type;
                TypeInfo etype = ltype.getListElementTypeInfo();
                // Create the list if needed
                ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;
                // Read the list
                int size = 0;
                while (true) {
                    int more = buffer.read(invert);
                    if (more == 0) {
                        // \0 to terminate
                        break;
                    }
                    // \1 followed by each element
                    assert (more == 1);
                    if (size == r.size()) {
                        r.add(null);
                    }
                    r.set(size, deserialize(buffer, etype, invert, nullMarker, notNullMarker, r.get(size)));
                    size++;
                }
                // Remove additional elements if the list is reused
                while (r.size() > size) {
                    r.remove(r.size() - 1);
                }
                return r;
            }
        case MAP:
            {
                MapTypeInfo mtype = (MapTypeInfo) type;
                TypeInfo ktype = mtype.getMapKeyTypeInfo();
                TypeInfo vtype = mtype.getMapValueTypeInfo();
                // Create the map if needed
                Map<Object, Object> r;
                if (reuse == null || reuse.getClass() != LinkedHashMap.class) {
                    r = new LinkedHashMap<Object, Object>();
                } else {
                    r = (Map<Object, Object>) reuse;
                    r.clear();
                }
                while (true) {
                    int more = buffer.read(invert);
                    if (more == 0) {
                        // \0 to terminate
                        break;
                    }
                    // \1 followed by each key and then each value
                    assert (more == 1);
                    Object k = deserialize(buffer, ktype, invert, nullMarker, notNullMarker, null);
                    Object v = deserialize(buffer, vtype, invert, nullMarker, notNullMarker, null);
                    r.put(k, v);
                }
                return r;
            }
        case STRUCT:
            {
                StructTypeInfo stype = (StructTypeInfo) type;
                List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
                int size = fieldTypes.size();
                // Create the struct if needed
                ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
                assert (r.size() <= size);
                // Set the size of the struct
                while (r.size() < size) {
                    r.add(null);
                }
                // Read one field by one field
                for (int eid = 0; eid < size; eid++) {
                    r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, nullMarker, notNullMarker, r.get(eid)));
                }
                return r;
            }
        case UNION:
            {
                UnionTypeInfo utype = (UnionTypeInfo) type;
                StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
                // Read the tag
                byte tag = buffer.read(invert);
                r.setTag(tag);
                r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, nullMarker, notNullMarker, null));
                return r;
            }
        default:
            {
                throw new RuntimeException("Unrecognized type: " + type.getCategory());
            }
    }
}
Also used : ArrayList(java.util.ArrayList) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LinkedHashMap(java.util.LinkedHashMap) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) List(java.util.List) ArrayList(java.util.ArrayList) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) BigInteger(java.math.BigInteger) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 25 with UnionTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo in project hive by apache.

the class AvroObjectInspectorGenerator method createObjectInspectorWorker.

private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeException {
    // at deserialization and the object inspector will never see the actual union.
    if (!supportedCategories(ti)) {
        throw new AvroSerdeException("Don't yet support this type: " + ti);
    }
    ObjectInspector result;
    switch(ti.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) ti;
            result = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
            break;
        case STRUCT:
            StructTypeInfo sti = (StructTypeInfo) ti;
            ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
            for (TypeInfo typeInfo : sti.getAllStructFieldTypeInfos()) {
                ois.add(createObjectInspectorWorker(typeInfo));
            }
            result = ObjectInspectorFactory.getStandardStructObjectInspector(sti.getAllStructFieldNames(), ois);
            break;
        case MAP:
            MapTypeInfo mti = (MapTypeInfo) ti;
            result = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING), createObjectInspectorWorker(mti.getMapValueTypeInfo()));
            break;
        case LIST:
            ListTypeInfo ati = (ListTypeInfo) ti;
            result = ObjectInspectorFactory.getStandardListObjectInspector(createObjectInspectorWorker(ati.getListElementTypeInfo()));
            break;
        case UNION:
            UnionTypeInfo uti = (UnionTypeInfo) ti;
            List<TypeInfo> allUnionObjectTypeInfos = uti.getAllUnionObjectTypeInfos();
            List<ObjectInspector> unionObjectInspectors = new ArrayList<ObjectInspector>(allUnionObjectTypeInfos.size());
            for (TypeInfo typeInfo : allUnionObjectTypeInfos) {
                unionObjectInspectors.add(createObjectInspectorWorker(typeInfo));
            }
            result = ObjectInspectorFactory.getStandardUnionObjectInspector(unionObjectInspectors);
            break;
        default:
            throw new AvroSerdeException("No Hive categories matched: " + ti);
    }
    return result;
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Aggregations

UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)31 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)26 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)26 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)25 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)24 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)23 ArrayList (java.util.ArrayList)16 List (java.util.List)13 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)12 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)9 BytesWritable (org.apache.hadoop.io.BytesWritable)9 Text (org.apache.hadoop.io.Text)9 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)8 HiveIntervalDayTimeWritable (org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable)8 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)8 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)8 Map (java.util.Map)7 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)7 Timestamp (org.apache.hadoop.hive.common.type.Timestamp)7 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)7