Search in sources :

Example 76 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class SerdeRandomRowSource method chooseSchema.

private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) {
    HashSet hashSet = null;
    final boolean allTypes;
    final boolean onlyOne = (r.nextInt(100) == 7);
    if (onlyOne) {
        columnCount = 1;
        allTypes = false;
    } else {
        allTypes = r.nextBoolean();
        if (allTypes) {
            switch(supportedTypes) {
                case ALL:
                    columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
                    break;
                case ALL_EXCEPT_MAP:
                    columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
                    break;
                case PRIMITIVE:
                    columnCount = possibleHivePrimitiveTypeNames.length;
                    break;
            }
            hashSet = new HashSet<Integer>();
        } else {
            columnCount = 1 + r.nextInt(20);
        }
    }
    typeNames = new ArrayList<String>(columnCount);
    categories = new Category[columnCount];
    typeInfos = new TypeInfo[columnCount];
    objectInspectorList = new ArrayList<ObjectInspector>(columnCount);
    primitiveCategories = new PrimitiveCategory[columnCount];
    primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
    primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
    final List<String> columnNames = new ArrayList<String>(columnCount);
    for (int c = 0; c < columnCount; c++) {
        columnNames.add(String.format("col%d", c));
        String typeName;
        if (onlyOne) {
            typeName = getRandomTypeName(supportedTypes);
        } else {
            int typeNum;
            if (allTypes) {
                int maxTypeNum = 0;
                switch(supportedTypes) {
                    case PRIMITIVE:
                        maxTypeNum = possibleHivePrimitiveTypeNames.length;
                        break;
                    case ALL_EXCEPT_MAP:
                        maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
                        break;
                    case ALL:
                        maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
                        break;
                }
                while (true) {
                    typeNum = r.nextInt(maxTypeNum);
                    final Integer typeNumInteger = new Integer(typeNum);
                    if (!hashSet.contains(typeNumInteger)) {
                        hashSet.add(typeNumInteger);
                        break;
                    }
                }
            } else {
                if (supportedTypes == SupportedTypes.PRIMITIVE || r.nextInt(10) != 0) {
                    typeNum = r.nextInt(possibleHivePrimitiveTypeNames.length);
                } else {
                    typeNum = possibleHivePrimitiveTypeNames.length + r.nextInt(possibleHiveComplexTypeNames.length);
                    if (supportedTypes == SupportedTypes.ALL_EXCEPT_MAP) {
                        typeNum--;
                    }
                }
            }
            if (typeNum < possibleHivePrimitiveTypeNames.length) {
                typeName = possibleHivePrimitiveTypeNames[typeNum];
            } else {
                typeName = possibleHiveComplexTypeNames[typeNum - possibleHivePrimitiveTypeNames.length];
            }
        }
        final String decoratedTypeName = getDecoratedTypeName(typeName, supportedTypes, 0, maxComplexDepth);
        final TypeInfo typeInfo;
        try {
            typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(decoratedTypeName);
        } catch (Exception e) {
            throw new RuntimeException("Cannot convert type name " + decoratedTypeName + " to a type " + e);
        }
        typeInfos[c] = typeInfo;
        final Category category = typeInfo.getCategory();
        categories[c] = category;
        ObjectInspector objectInspector = getObjectInspector(typeInfo);
        switch(category) {
            case PRIMITIVE:
                {
                    final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                    final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
                    objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo);
                    primitiveTypeInfos[c] = primitiveTypeInfo;
                    primitiveCategories[c] = primitiveCategory;
                    primitiveObjectInspectorList.add(objectInspector);
                }
                break;
            case LIST:
            case MAP:
            case STRUCT:
            case UNION:
                primitiveObjectInspectorList.add(null);
                break;
            default:
                throw new RuntimeException("Unexpected catagory " + category);
        }
        objectInspectorList.add(objectInspector);
        if (category == Category.PRIMITIVE) {
        }
        typeNames.add(decoratedTypeName);
    }
    rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, objectInspectorList);
    alphabets = new String[columnCount];
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) WritableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector) WritableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableHiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) WritableHiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ArrayList(java.util.ArrayList) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) HashSet(java.util.HashSet)

Example 77 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class VerifyFast method serializeWrite.

public static void serializeWrite(SerializeWrite serializeWrite, TypeInfo typeInfo, Object object) throws IOException {
    if (object == null) {
        serializeWrite.writeNull();
        return;
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                switch(primitiveTypeInfo.getPrimitiveCategory()) {
                    case BOOLEAN:
                        {
                            boolean value = ((BooleanWritable) object).get();
                            serializeWrite.writeBoolean(value);
                        }
                        break;
                    case BYTE:
                        {
                            byte value = ((ByteWritable) object).get();
                            serializeWrite.writeByte(value);
                        }
                        break;
                    case SHORT:
                        {
                            short value = ((ShortWritable) object).get();
                            serializeWrite.writeShort(value);
                        }
                        break;
                    case INT:
                        {
                            int value = ((IntWritable) object).get();
                            serializeWrite.writeInt(value);
                        }
                        break;
                    case LONG:
                        {
                            long value = ((LongWritable) object).get();
                            serializeWrite.writeLong(value);
                        }
                        break;
                    case FLOAT:
                        {
                            float value = ((FloatWritable) object).get();
                            serializeWrite.writeFloat(value);
                        }
                        break;
                    case DOUBLE:
                        {
                            double value = ((DoubleWritable) object).get();
                            serializeWrite.writeDouble(value);
                        }
                        break;
                    case STRING:
                        {
                            Text value = (Text) object;
                            byte[] stringBytes = value.getBytes();
                            int stringLength = stringBytes.length;
                            serializeWrite.writeString(stringBytes, 0, stringLength);
                        }
                        break;
                    case CHAR:
                        {
                            HiveChar value = ((HiveCharWritable) object).getHiveChar();
                            serializeWrite.writeHiveChar(value);
                        }
                        break;
                    case VARCHAR:
                        {
                            HiveVarchar value = ((HiveVarcharWritable) object).getHiveVarchar();
                            serializeWrite.writeHiveVarchar(value);
                        }
                        break;
                    case DECIMAL:
                        {
                            HiveDecimal value = ((HiveDecimalWritable) object).getHiveDecimal();
                            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                            serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
                        }
                        break;
                    case DATE:
                        {
                            Date value = ((DateWritable) object).get();
                            serializeWrite.writeDate(value);
                        }
                        break;
                    case TIMESTAMP:
                        {
                            Timestamp value = ((TimestampWritable) object).getTimestamp();
                            serializeWrite.writeTimestamp(value);
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
                            serializeWrite.writeHiveIntervalYearMonth(value);
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
                            serializeWrite.writeHiveIntervalDayTime(value);
                        }
                        break;
                    case BINARY:
                        {
                            BytesWritable byteWritable = (BytesWritable) object;
                            byte[] binaryBytes = byteWritable.getBytes();
                            int length = byteWritable.getLength();
                            serializeWrite.writeBinary(binaryBytes, 0, length);
                        }
                        break;
                    default:
                        throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
                }
            }
            break;
        case LIST:
            {
                ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
                TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
                ArrayList<Object> elements = (ArrayList<Object>) object;
                serializeWrite.beginList(elements);
                boolean isFirst = true;
                for (Object elementObject : elements) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateList();
                    }
                    if (elementObject == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, elementTypeInfo, elementObject);
                    }
                }
                serializeWrite.finishList();
            }
            break;
        case MAP:
            {
                MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
                TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
                TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
                HashMap<Object, Object> hashMap = (HashMap<Object, Object>) object;
                serializeWrite.beginMap(hashMap);
                boolean isFirst = true;
                for (Entry<Object, Object> entry : hashMap.entrySet()) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateKeyValuePair();
                    }
                    if (entry.getKey() == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, keyTypeInfo, entry.getKey());
                    }
                    serializeWrite.separateKey();
                    if (entry.getValue() == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, valueTypeInfo, entry.getValue());
                    }
                }
                serializeWrite.finishMap();
            }
            break;
        case STRUCT:
            {
                StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                ArrayList<Object> fieldValues = (ArrayList<Object>) object;
                final int size = fieldValues.size();
                serializeWrite.beginStruct(fieldValues);
                boolean isFirst = true;
                for (int i = 0; i < size; i++) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateStruct();
                    }
                    serializeWrite(serializeWrite, fieldTypeInfos.get(i), fieldValues.get(i));
                }
                serializeWrite.finishStruct();
            }
            break;
        case UNION:
            {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final int size = fieldTypeInfos.size();
                StandardUnion standardUnion = (StandardUnion) object;
                byte tag = standardUnion.getTag();
                serializeWrite.beginUnion(tag);
                serializeWrite(serializeWrite, fieldTypeInfos.get(tag), standardUnion.getObject());
                serializeWrite.finishUnion();
            }
            break;
        default:
            throw new Error("Unknown category " + typeInfo.getCategory().name());
    }
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Entry(java.util.Map.Entry) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ArrayList(java.util.ArrayList) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(java.sql.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 78 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class LazyBinaryUtils method checkObjectByteInfo.

/**
 * Check a particular field and set its size and offset in bytes based on the
 * field type and the bytes arrays.
 *
 * For void, boolean, byte, short, int, long, float and double, there is no
 * offset and the size is fixed. For string, map, list, struct, the first four
 * bytes are used to store the size. So the offset is 4 and the size is
 * computed by concating the first four bytes together. The first four bytes
 * are defined with respect to the offset in the bytes arrays.
 * For timestamp, if the first bit is 0, the record length is 4, otherwise
 * a VInt begins at the 5th byte and its length is added to 4.
 *
 * @param objectInspector
 *          object inspector of the field
 * @param bytes
 *          bytes arrays store the table row
 * @param offset
 *          offset of this field
 * @param recordInfo
 *          modify this byteinfo object and return it
 */
public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo, VInt vInt) {
    Category category = objectInspector.getCategory();
    switch(category) {
        case PRIMITIVE:
            PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector).getPrimitiveCategory();
            switch(primitiveCategory) {
                case VOID:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = 0;
                    break;
                case BOOLEAN:
                case BYTE:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = 1;
                    break;
                case SHORT:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = 2;
                    break;
                case FLOAT:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = 4;
                    break;
                case DOUBLE:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = 8;
                    break;
                case INT:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
                    break;
                case LONG:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
                    break;
                case STRING:
                    // using vint instead of 4 bytes
                    LazyBinaryUtils.readVInt(bytes, offset, vInt);
                    recordInfo.elementOffset = vInt.length;
                    recordInfo.elementSize = vInt.value;
                    break;
                case CHAR:
                case VARCHAR:
                    LazyBinaryUtils.readVInt(bytes, offset, vInt);
                    recordInfo.elementOffset = vInt.length;
                    recordInfo.elementSize = vInt.value;
                    break;
                case BINARY:
                    // using vint instead of 4 bytes
                    LazyBinaryUtils.readVInt(bytes, offset, vInt);
                    recordInfo.elementOffset = vInt.length;
                    recordInfo.elementSize = vInt.value;
                    break;
                case DATE:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
                    break;
                case TIMESTAMP:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = TimestampWritable.getTotalLength(bytes, offset);
                    break;
                case TIMESTAMPLOCALTZ:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = TimestampLocalTZWritable.getTotalLength(bytes, offset);
                    break;
                case INTERVAL_YEAR_MONTH:
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
                    break;
                case INTERVAL_DAY_TIME:
                    recordInfo.elementOffset = 0;
                    int secondsSize = WritableUtils.decodeVIntSize(bytes[offset]);
                    int nanosSize = WritableUtils.decodeVIntSize(bytes[offset + secondsSize]);
                    recordInfo.elementSize = secondsSize + nanosSize;
                    break;
                case DECIMAL:
                    // using vint instead of 4 bytes
                    LazyBinaryUtils.readVInt(bytes, offset, vInt);
                    recordInfo.elementOffset = 0;
                    recordInfo.elementSize = vInt.length;
                    LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt);
                    recordInfo.elementSize += vInt.length + vInt.value;
                    break;
                default:
                    {
                        throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
                    }
            }
            break;
        case LIST:
        case MAP:
        case STRUCT:
        case UNION:
            recordInfo.elementOffset = 4;
            recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset);
            break;
        default:
            {
                throw new RuntimeException("Unrecognized non-primitive type: " + category);
            }
    }
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 79 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class LazyBinaryDeserializeRead method readPrimitive.

private boolean readPrimitive(Field field) throws IOException {
    final PrimitiveCategory primitiveCategory = field.primitiveCategory;
    final TypeInfo typeInfo = field.typeInfo;
    switch(primitiveCategory) {
        case BOOLEAN:
            // No check needed for single byte read.
            currentBoolean = (bytes[offset++] != 0);
            break;
        case BYTE:
            // No check needed for single byte read.
            currentByte = bytes[offset++];
            break;
        case SHORT:
            // Last item -- ok to be at end.
            if (offset + 2 > end) {
                throw new EOFException();
            }
            currentShort = LazyBinaryUtils.byteArrayToShort(bytes, offset);
            offset += 2;
            break;
        case INT:
            // Parse the first byte of a vint/vlong to determine the number of bytes.
            if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
                throw new EOFException();
            }
            LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
            offset += tempVInt.length;
            currentInt = tempVInt.value;
            break;
        case LONG:
            // Parse the first byte of a vint/vlong to determine the number of bytes.
            if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
                throw new EOFException();
            }
            LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
            offset += tempVLong.length;
            currentLong = tempVLong.value;
            break;
        case FLOAT:
            // Last item -- ok to be at end.
            if (offset + 4 > end) {
                throw new EOFException();
            }
            currentFloat = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset));
            offset += 4;
            break;
        case DOUBLE:
            // Last item -- ok to be at end.
            if (offset + 8 > end) {
                throw new EOFException();
            }
            currentDouble = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset));
            offset += 8;
            break;
        case BINARY:
        case STRING:
        case CHAR:
        case VARCHAR:
            {
                // Parse the first byte of a vint/vlong to determine the number of bytes.
                if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
                    throw new EOFException();
                }
                LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
                offset += tempVInt.length;
                int saveStart = offset;
                int length = tempVInt.value;
                offset += length;
                // Last item -- ok to be at end.
                if (offset > end) {
                    throw new EOFException();
                }
                currentBytes = bytes;
                currentBytesStart = saveStart;
                currentBytesLength = length;
            }
            break;
        case DATE:
            // Parse the first byte of a vint/vlong to determine the number of bytes.
            if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
                throw new EOFException();
            }
            LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
            offset += tempVInt.length;
            currentDateWritable.set(tempVInt.value);
            break;
        case TIMESTAMP:
            {
                int length = TimestampWritable.getTotalLength(bytes, offset);
                int saveStart = offset;
                offset += length;
                // Last item -- ok to be at end.
                if (offset > end) {
                    throw new EOFException();
                }
                currentTimestampWritable.set(bytes, saveStart);
            }
            break;
        case INTERVAL_YEAR_MONTH:
            // Parse the first byte of a vint/vlong to determine the number of bytes.
            if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
                throw new EOFException();
            }
            LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
            offset += tempVInt.length;
            currentHiveIntervalYearMonthWritable.set(tempVInt.value);
            break;
        case INTERVAL_DAY_TIME:
            // Parse the first byte of a vint/vlong to determine the number of bytes.
            if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
                throw new EOFException();
            }
            LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
            offset += tempVLong.length;
            // Parse the first byte of a vint/vlong to determine the number of bytes.
            if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
                throw new EOFException();
            }
            LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
            offset += tempVInt.length;
            currentHiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value);
            break;
        case DECIMAL:
            {
                // Parse the first byte of a vint/vlong to determine the number of bytes.
                if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
                    throw new EOFException();
                }
                LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
                offset += tempVInt.length;
                int readScale = tempVInt.value;
                // Parse the first byte of a vint/vlong to determine the number of bytes.
                if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
                    throw new EOFException();
                }
                LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
                offset += tempVInt.length;
                int saveStart = offset;
                offset += tempVInt.value;
                // Last item -- ok to be at end.
                if (offset > end) {
                    throw new EOFException();
                }
                int length = offset - saveStart;
                // scale = 2, length = 6, value = -6065716379.11
                // \002\006\255\114\197\131\083\105
                // \255\114\197\131\083\105
                currentHiveDecimalWritable.setFromBigIntegerBytesAndScale(bytes, saveStart, length, readScale);
                boolean decimalIsNull = !currentHiveDecimalWritable.isSet();
                if (!decimalIsNull) {
                    final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
                    final int precision = decimalTypeInfo.getPrecision();
                    final int scale = decimalTypeInfo.getScale();
                    decimalIsNull = !currentHiveDecimalWritable.mutateEnforcePrecisionScale(precision, scale);
                }
                if (decimalIsNull) {
                    return false;
                }
            }
            break;
        default:
            throw new Error("Unexpected primitive category " + primitiveCategory.name());
    }
    return true;
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) EOFException(java.io.EOFException) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 80 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class LazySimpleSerDe method serialize.

/**
 * Serialize the row into the StringBuilder.
 *
 * @param out
 *          The StringBuilder to store the serialized data.
 * @param obj
 *          The object for the current field.
 * @param objInspector
 *          The ObjectInspector for the current Object.
 * @param separators
 *          The separators array.
 * @param level
 *          The current level of separator.
 * @param nullSequence
 *          The byte sequence representing the NULL value.
 * @param escaped
 *          Whether we need to escape the data when writing out
 * @param escapeChar
 *          Which char to use as the escape char, e.g. '\\'
 * @param needsEscape
 *          Which byte needs to be escaped for 256 bytes.
 * @throws IOException
 * @throws SerDeException
 */
public static void serialize(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException, SerDeException {
    if (obj == null) {
        out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        return;
    }
    char separator;
    List<?> list;
    switch(objInspector.getCategory()) {
        case PRIMITIVE:
            LazyUtils.writePrimitiveUTF8(out, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape);
            return;
        case LIST:
            separator = (char) LazyUtils.getSeparator(separators, level);
            ListObjectInspector loi = (ListObjectInspector) objInspector;
            list = loi.getList(obj);
            ObjectInspector eoi = loi.getListElementObjectInspector();
            if (list == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        out.write(separator);
                    }
                    serialize(out, list.get(i), eoi, separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case MAP:
            separator = (char) LazyUtils.getSeparator(separators, level);
            char keyValueSeparator = (char) LazyUtils.getSeparator(separators, level + 1);
            MapObjectInspector moi = (MapObjectInspector) objInspector;
            ObjectInspector koi = moi.getMapKeyObjectInspector();
            ObjectInspector voi = moi.getMapValueObjectInspector();
            Map<?, ?> map = moi.getMap(obj);
            if (map == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                boolean first = true;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    if (first) {
                        first = false;
                    } else {
                        out.write(separator);
                    }
                    serialize(out, entry.getKey(), koi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
                    out.write(keyValueSeparator);
                    serialize(out, entry.getValue(), voi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case STRUCT:
            separator = (char) LazyUtils.getSeparator(separators, level);
            StructObjectInspector soi = (StructObjectInspector) objInspector;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();
            list = soi.getStructFieldsDataAsList(obj);
            if (list == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        out.write(separator);
                    }
                    serialize(out, list.get(i), fields.get(i).getFieldObjectInspector(), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case UNION:
            separator = (char) LazyUtils.getSeparator(separators, level);
            UnionObjectInspector uoi = (UnionObjectInspector) objInspector;
            List<? extends ObjectInspector> ois = uoi.getObjectInspectors();
            if (ois == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                LazyUtils.writePrimitiveUTF8(out, new Byte(uoi.getTag(obj)), PrimitiveObjectInspectorFactory.javaByteObjectInspector, escaped, escapeChar, needsEscape);
                out.write(separator);
                serialize(out, uoi.getField(obj), ois.get(uoi.getTag(obj)), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
            }
            return;
        default:
            break;
    }
    throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) Map(java.util.Map) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)44 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)33 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)31 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)27 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)26 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)26 ArrayList (java.util.ArrayList)22 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)20 BytesWritable (org.apache.hadoop.io.BytesWritable)19 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)18 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)18 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)18 Text (org.apache.hadoop.io.Text)18 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)17 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)17 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)16 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)16 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)16 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)16 IntWritable (org.apache.hadoop.io.IntWritable)16