Search in sources :

Example 31 with ListObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector in project hive by apache.

the class VectorRandomRowSource method getObjectInspector.

private ObjectInspector getObjectInspector(TypeInfo typeInfo) {
    final ObjectInspector objectInspector;
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            {
                final PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) typeInfo;
                objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveType);
            }
            break;
        case MAP:
            {
                final MapTypeInfo mapType = (MapTypeInfo) typeInfo;
                final MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector(getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
                objectInspector = mapInspector;
            }
            break;
        case LIST:
            {
                final ListTypeInfo listType = (ListTypeInfo) typeInfo;
                final ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector(getObjectInspector(listType.getListElementTypeInfo()));
                objectInspector = listInspector;
            }
            break;
        case STRUCT:
            {
                final StructTypeInfo structType = (StructTypeInfo) typeInfo;
                final List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();
                final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
                for (TypeInfo fieldType : fieldTypes) {
                    fieldInspectors.add(getObjectInspector(fieldType));
                }
                final StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(structType.getAllStructFieldNames(), fieldInspectors);
                objectInspector = structInspector;
            }
            break;
        case UNION:
            {
                final UnionTypeInfo unionType = (UnionTypeInfo) typeInfo;
                final List<TypeInfo> fieldTypes = unionType.getAllUnionObjectTypeInfos();
                final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
                for (TypeInfo fieldType : fieldTypes) {
                    fieldInspectors.add(getObjectInspector(fieldType));
                }
                final UnionObjectInspector unionInspector = ObjectInspectorFactory.getStandardUnionObjectInspector(fieldInspectors);
                objectInspector = unionInspector;
            }
            break;
        default:
            throw new RuntimeException("Unexpected category " + typeInfo.getCategory());
    }
    Preconditions.checkState(objectInspector != null);
    return objectInspector;
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) WritableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector) WritableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableHiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) WritableHiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) List(java.util.List) ArrayList(java.util.ArrayList) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 32 with ListObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector in project hive by apache.

the class GenericUDFSortArrayByField method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
    returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
    /**
     *This UDF requires minimum 2 arguments array_name,field name
     */
    if (arguments.length < 2) {
        throw new UDFArgumentLengthException("SORT_ARRAY_BY requires minimum 2 arguments, got " + arguments.length);
    }
    /**
     *First argument must be array
     */
    switch(arguments[0].getCategory()) {
        case LIST:
            listObjectInspector = (ListObjectInspector) arguments[0];
            break;
        default:
            throw new UDFArgumentTypeException(0, "Argument 1 of function SORT_ARRAY_BY must be " + serdeConstants.LIST_TYPE_NAME + ", but " + arguments[0].getTypeName() + " was found.");
    }
    /**
     *Elements inside first argument(array) must be tuple(s)
     */
    switch(listObjectInspector.getListElementObjectInspector().getCategory()) {
        case STRUCT:
            structObjectInspector = (StructObjectInspector) listObjectInspector.getListElementObjectInspector();
            break;
        default:
            throw new UDFArgumentTypeException(0, "Element[s] of first argument array in function SORT_ARRAY_BY must be " + serdeConstants.STRUCT_TYPE_NAME + ", but " + listObjectInspector.getTypeName() + " was found.");
    }
    /**
     *All sort fields argument name and sort order name must be in String type
     */
    converters = new Converter[arguments.length];
    inputTypes = new PrimitiveCategory[arguments.length];
    fields = new StructField[arguments.length - 1];
    noOfInputFields = arguments.length - 1;
    for (int i = 1; i < arguments.length; i++) {
        checkArgPrimitive(arguments, i);
        checkArgGroups(arguments, i, inputTypes, PrimitiveGrouping.STRING_GROUP);
        if (arguments[i] instanceof ConstantObjectInspector) {
            String fieldName = getConstantStringValue(arguments, i);
            /**
             *checking whether any sorting order (ASC,DESC) has specified in last argument
             */
            if (i != 1 && (i == arguments.length - 1) && (fieldName.trim().toUpperCase().equals(SORT_ORDER_TYPE.ASC.name()) || fieldName.trim().toUpperCase().equals(SORT_ORDER_TYPE.DESC.name()))) {
                sortOrder = SORT_ORDER_TYPE.valueOf(fieldName.trim().toUpperCase());
                noOfInputFields -= 1;
                continue;
            }
            fields[i - 1] = structObjectInspector.getStructFieldRef(getConstantStringValue(arguments, i));
        }
        obtainStringConverter(arguments, i, inputTypes, converters);
    }
    ObjectInspector returnOI = returnOIResolver.get(structObjectInspector);
    converters[0] = ObjectInspectorConverters.getConverter(structObjectInspector, returnOI);
    return ObjectInspectorFactory.getStandardListObjectInspector(structObjectInspector);
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)

Example 33 with ListObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector in project hive by apache.

the class VectorAssignRow method assignConvertRowColumn.

private void assignConvertRowColumn(ColumnVector columnVector, int batchIndex, TypeInfo targetTypeInfo, ObjectInspector sourceObjectInspector, Writable convertTargetWritable, Object object) {
    final Category targetCategory = targetTypeInfo.getCategory();
    if (targetCategory == null) {
        /*
       * This is a column that we don't want (i.e. not included) -- we are done.
       */
        return;
    }
    if (object == null) {
        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
        return;
    }
    try {
        switch(targetCategory) {
            case PRIMITIVE:
                final PrimitiveObjectInspector sourcePrimitiveOI = (PrimitiveObjectInspector) sourceObjectInspector;
                final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory();
                switch(targetPrimitiveCategory) {
                    case VOID:
                        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                        return;
                    case BOOLEAN:
                        ((LongColumnVector) columnVector).vector[batchIndex] = (PrimitiveObjectInspectorUtils.getBoolean(object, sourcePrimitiveOI) ? 1 : 0);
                        break;
                    case BYTE:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getByte(object, sourcePrimitiveOI);
                        break;
                    case SHORT:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getShort(object, sourcePrimitiveOI);
                        break;
                    case INT:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getInt(object, sourcePrimitiveOI);
                        break;
                    case LONG:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getLong(object, sourcePrimitiveOI);
                        break;
                    case TIMESTAMP:
                        {
                            final Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestamp(object, sourcePrimitiveOI);
                            if (timestamp == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((TimestampColumnVector) columnVector).set(batchIndex, timestamp);
                        }
                        break;
                    case DATE:
                        {
                            final Date date = PrimitiveObjectInspectorUtils.getDate(object, sourcePrimitiveOI);
                            if (date == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            DateWritable dateWritable = (DateWritable) convertTargetWritable;
                            if (dateWritable == null) {
                                dateWritable = new DateWritable();
                            }
                            dateWritable.set(date);
                            ((LongColumnVector) columnVector).vector[batchIndex] = dateWritable.getDays();
                        }
                        break;
                    case FLOAT:
                        ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getFloat(object, sourcePrimitiveOI);
                        break;
                    case DOUBLE:
                        ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getDouble(object, sourcePrimitiveOI);
                        break;
                    case BINARY:
                        {
                            final BytesWritable bytesWritable = PrimitiveObjectInspectorUtils.getBinary(object, sourcePrimitiveOI);
                            if (bytesWritable == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength());
                        }
                        break;
                    case STRING:
                        {
                            final String string = PrimitiveObjectInspectorUtils.getString(object, sourcePrimitiveOI);
                            if (string == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            Text text = (Text) convertTargetWritable;
                            if (text == null) {
                                text = new Text();
                            }
                            text.set(string);
                            ((BytesColumnVector) columnVector).setVal(batchIndex, text.getBytes(), 0, text.getLength());
                        }
                        break;
                    case VARCHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            final HiveVarchar hiveVarchar = PrimitiveObjectInspectorUtils.getHiveVarchar(object, sourcePrimitiveOI);
                            if (hiveVarchar == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            // TODO: Do we need maxLength checking?
                            byte[] bytes = hiveVarchar.getValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case CHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            final HiveChar hiveChar = PrimitiveObjectInspectorUtils.getHiveChar(object, sourcePrimitiveOI);
                            if (hiveChar == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            // We store CHAR in vector row batch with padding stripped.
                            // TODO: Do we need maxLength checking?
                            final byte[] bytes = hiveChar.getStrippedValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case DECIMAL:
                        {
                            final HiveDecimal hiveDecimal = PrimitiveObjectInspectorUtils.getHiveDecimal(object, sourcePrimitiveOI);
                            if (hiveDecimal == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            if (columnVector instanceof Decimal64ColumnVector) {
                                Decimal64ColumnVector dec64ColVector = (Decimal64ColumnVector) columnVector;
                                dec64ColVector.set(batchIndex, hiveDecimal);
                                if (dec64ColVector.isNull[batchIndex]) {
                                    return;
                                }
                            } else {
                                ((DecimalColumnVector) columnVector).set(batchIndex, hiveDecimal);
                            }
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            final HiveIntervalYearMonth intervalYearMonth = PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth(object, sourcePrimitiveOI);
                            if (intervalYearMonth == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((LongColumnVector) columnVector).vector[batchIndex] = intervalYearMonth.getTotalMonths();
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            final HiveIntervalDayTime intervalDayTime = PrimitiveObjectInspectorUtils.getHiveIntervalDayTime(object, sourcePrimitiveOI);
                            if (intervalDayTime == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, intervalDayTime);
                        }
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported");
                }
                break;
            case LIST:
                {
                    final ListColumnVector listColumnVector = (ListColumnVector) columnVector;
                    final ListObjectInspector sourceListOI = (ListObjectInspector) sourceObjectInspector;
                    final ObjectInspector sourceElementOI = sourceListOI.getListElementObjectInspector();
                    final int size = sourceListOI.getListLength(object);
                    final TypeInfo targetElementTypeInfo = ((ListTypeInfo) targetTypeInfo).getListElementTypeInfo();
                    listColumnVector.offsets[batchIndex] = listColumnVector.childCount;
                    listColumnVector.childCount += size;
                    listColumnVector.ensureSize(listColumnVector.childCount, true);
                    listColumnVector.lengths[batchIndex] = size;
                    for (int i = 0; i < size; i++) {
                        final Object element = sourceListOI.getListElement(object, i);
                        final int offset = (int) (listColumnVector.offsets[batchIndex] + i);
                        assignConvertRowColumn(listColumnVector.child, offset, targetElementTypeInfo, sourceElementOI, null, element);
                    }
                }
                break;
            case MAP:
                {
                    final MapColumnVector mapColumnVector = (MapColumnVector) columnVector;
                    final MapObjectInspector mapObjectInspector = (MapObjectInspector) sourceObjectInspector;
                    final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo;
                    final Map<?, ?> map = mapObjectInspector.getMap(object);
                    for (Map.Entry<?, ?> entry : map.entrySet()) {
                        assignConvertRowColumn(mapColumnVector.keys, batchIndex, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), null, entry.getKey());
                        assignConvertRowColumn(mapColumnVector.values, batchIndex, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), null, entry.getValue());
                    }
                }
                break;
            case STRUCT:
                {
                    final StructColumnVector structColumnVector = (StructColumnVector) columnVector;
                    final StructObjectInspector sourceStructOI = (StructObjectInspector) sourceObjectInspector;
                    final List<? extends StructField> sourceFields = sourceStructOI.getAllStructFieldRefs();
                    final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo;
                    final List<TypeInfo> targetTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos();
                    final int size = targetTypeInfos.size();
                    for (int i = 0; i < size; i++) {
                        if (i < sourceFields.size()) {
                            final StructField sourceStructField = sourceFields.get(i);
                            final ObjectInspector sourceFieldOI = sourceStructField.getFieldObjectInspector();
                            final Object sourceData = sourceStructOI.getStructFieldData(object, sourceStructField);
                            assignConvertRowColumn(structColumnVector.fields[i], batchIndex, targetTypeInfos.get(i), sourceFieldOI, null, sourceData);
                        } else {
                            final ColumnVector fieldColumnVector = structColumnVector.fields[i];
                            VectorizedBatchUtil.setNullColIsNullValue(fieldColumnVector, batchIndex);
                        }
                    }
                }
                break;
            case UNION:
                {
                    final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector;
                    final UnionObjectInspector unionObjectInspector = (UnionObjectInspector) sourceObjectInspector;
                    final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo;
                    final int tag = unionObjectInspector.getTag(object);
                    assignConvertRowColumn(unionColumnVector.fields[tag], batchIndex, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), unionObjectInspector.getObjectInspectors().get(tag), null, unionObjectInspector.getField(tag));
                }
                break;
            default:
                throw new RuntimeException("Category " + targetCategory.name() + " not supported");
        }
    } catch (NumberFormatException e) {
        // Some of the conversion methods throw this exception on numeric parsing errors.
        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
        return;
    }
    // We always set the null flag to false when there is a value.
    columnVector.isNull[batchIndex] = false;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) List(java.util.List) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(java.sql.Date) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 34 with ListObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector in project hive by apache.

the class VectorExtractRow method extractRowColumn.

public Object extractRowColumn(ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) {
    if (colVector == null) {
        // may ask for them..
        return null;
    }
    final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
    if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
        return null;
    }
    final Category category = typeInfo.getCategory();
    switch(category) {
        case PRIMITIVE:
            {
                final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
                final Writable primitiveWritable = VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
                switch(primitiveCategory) {
                    case VOID:
                        return null;
                    case BOOLEAN:
                        ((BooleanWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true);
                        return primitiveWritable;
                    case BYTE:
                        ((ByteWritable) primitiveWritable).set((byte) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case SHORT:
                        ((ShortWritable) primitiveWritable).set((short) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INT:
                        ((IntWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case LONG:
                        ((LongWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case TIMESTAMP:
                        ((TimestampWritable) primitiveWritable).set(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex));
                        return primitiveWritable;
                    case DATE:
                        ((DateWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case FLOAT:
                        ((FloatWritable) primitiveWritable).set((float) ((DoubleColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case DOUBLE:
                        ((DoubleWritable) primitiveWritable).set(((DoubleColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case BINARY:
                        {
                            final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
                            final byte[] bytes = bytesColVector.vector[adjustedIndex];
                            final int start = bytesColVector.start[adjustedIndex];
                            final int length = bytesColVector.length[adjustedIndex];
                            if (bytesColVector.isRepeating) {
                                if (!bytesColVector.isNull[0] && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            } else {
                                if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            }
                            BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
                            bytesWritable.set(bytes, start, length);
                            return primitiveWritable;
                        }
                    case STRING:
                        {
                            final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
                            final byte[] bytes = bytesColVector.vector[adjustedIndex];
                            final int start = bytesColVector.start[adjustedIndex];
                            final int length = bytesColVector.length[adjustedIndex];
                            if (bytesColVector.isRepeating) {
                                if (!bytesColVector.isNull[0] && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            } else {
                                if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            }
                            // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
                            ((Text) primitiveWritable).set(bytes, start, length);
                            return primitiveWritable;
                        }
                    case VARCHAR:
                        {
                            final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
                            final byte[] bytes = bytesColVector.vector[adjustedIndex];
                            final int start = bytesColVector.start[adjustedIndex];
                            final int length = bytesColVector.length[adjustedIndex];
                            if (bytesColVector.isRepeating) {
                                if (!bytesColVector.isNull[0] && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            } else {
                                if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            }
                            final int adjustedLength = StringExpr.truncate(bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
                            final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
                            hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
                            return primitiveWritable;
                        }
                    case CHAR:
                        {
                            final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
                            final byte[] bytes = bytesColVector.vector[adjustedIndex];
                            final int start = bytesColVector.start[adjustedIndex];
                            final int length = bytesColVector.length[adjustedIndex];
                            if (bytesColVector.isRepeating) {
                                if (!bytesColVector.isNull[0] && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            } else {
                                if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            }
                            final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, ((CharTypeInfo) primitiveTypeInfo).getLength());
                            final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
                            hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), ((CharTypeInfo) primitiveTypeInfo).getLength());
                            return primitiveWritable;
                        }
                    case DECIMAL:
                        if (colVector instanceof Decimal64ColumnVector) {
                            Decimal64ColumnVector dec32ColVector = (Decimal64ColumnVector) colVector;
                            ((HiveDecimalWritable) primitiveWritable).deserialize64(dec32ColVector.vector[adjustedIndex], dec32ColVector.scale);
                        } else {
                            // The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
                            ((HiveDecimalWritable) primitiveWritable).set(((DecimalColumnVector) colVector).vector[adjustedIndex]);
                        }
                        return primitiveWritable;
                    case INTERVAL_YEAR_MONTH:
                        ((HiveIntervalYearMonthWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INTERVAL_DAY_TIME:
                        ((HiveIntervalDayTimeWritable) primitiveWritable).set(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex));
                        return primitiveWritable;
                    default:
                        throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
                }
            }
        case LIST:
            {
                final ListColumnVector listColumnVector = (ListColumnVector) colVector;
                final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
                final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
                final int offset = (int) listColumnVector.offsets[adjustedIndex];
                final int size = (int) listColumnVector.lengths[adjustedIndex];
                final List list = new ArrayList();
                for (int i = 0; i < size; i++) {
                    list.add(extractRowColumn(listColumnVector.child, listTypeInfo.getListElementTypeInfo(), listObjectInspector.getListElementObjectInspector(), offset + i));
                }
                return list;
            }
        case MAP:
            {
                final MapColumnVector mapColumnVector = (MapColumnVector) colVector;
                final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
                final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector;
                final int offset = (int) mapColumnVector.offsets[adjustedIndex];
                final int size = (int) mapColumnVector.lengths[adjustedIndex];
                final Map map = new HashMap();
                for (int i = 0; i < size; i++) {
                    final Object key = extractRowColumn(mapColumnVector.keys, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), offset + i);
                    final Object value = extractRowColumn(mapColumnVector.values, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), offset + i);
                    map.put(key, value);
                }
                return map;
            }
        case STRUCT:
            {
                final StructColumnVector structColumnVector = (StructColumnVector) colVector;
                final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                final StandardStructObjectInspector structInspector = (StandardStructObjectInspector) objectInspector;
                final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                final int size = fieldTypeInfos.size();
                final List<? extends StructField> structFields = structInspector.getAllStructFieldRefs();
                final Object struct = structInspector.create();
                for (int i = 0; i < size; i++) {
                    final StructField structField = structFields.get(i);
                    final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
                    final Object value = extractRowColumn(structColumnVector.fields[i], fieldTypeInfo, structField.getFieldObjectInspector(), adjustedIndex);
                    structInspector.setStructFieldData(struct, structField, value);
                }
                return struct;
            }
        case UNION:
            {
                final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector;
                final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors();
                final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector;
                final byte tag = (byte) unionColumnVector.tags[adjustedIndex];
                final Object object = extractRowColumn(unionColumnVector.fields[tag], objectTypeInfos.get(tag), unionInspectors.get(tag), adjustedIndex);
                final StandardUnion standardUnion = new StandardUnion();
                standardUnion.setTag(tag);
                standardUnion.setObject(object);
                return standardUnion;
            }
        default:
            throw new RuntimeException("Category " + category.name() + " not supported");
    }
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) ArrayList(java.util.ArrayList) List(java.util.List) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) HashMap(java.util.HashMap) Map(java.util.Map) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 35 with ListObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector in project hive by apache.

the class HiveMetaStoreUtils method getFieldsFromDeserializer.

/**
 * @param tableName name of the table
 * @param deserializer deserializer to use
 * @return the list of fields
 * @throws SerDeException if the serde throws an exception
 * @throws MetaException if one of the fields or types in the table is invalid
 */
public static List<FieldSchema> getFieldsFromDeserializer(String tableName, Deserializer deserializer) throws SerDeException, MetaException {
    ObjectInspector oi = deserializer.getObjectInspector();
    String[] names = tableName.split("\\.");
    String last_name = names[names.length - 1];
    for (int i = 1; i < names.length; i++) {
        if (oi instanceof StructObjectInspector) {
            StructObjectInspector soi = (StructObjectInspector) oi;
            StructField sf = soi.getStructFieldRef(names[i]);
            if (sf == null) {
                throw new MetaException("Invalid Field " + names[i]);
            } else {
                oi = sf.getFieldObjectInspector();
            }
        } else if (oi instanceof ListObjectInspector && names[i].equalsIgnoreCase("$elem$")) {
            ListObjectInspector loi = (ListObjectInspector) oi;
            oi = loi.getListElementObjectInspector();
        } else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$key$")) {
            MapObjectInspector moi = (MapObjectInspector) oi;
            oi = moi.getMapKeyObjectInspector();
        } else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$value$")) {
            MapObjectInspector moi = (MapObjectInspector) oi;
            oi = moi.getMapValueObjectInspector();
        } else {
            throw new MetaException("Unknown type for " + names[i]);
        }
    }
    ArrayList<FieldSchema> str_fields = new ArrayList<>();
    // rules on how to recurse the ObjectInspector based on its type
    if (oi.getCategory() != Category.STRUCT) {
        str_fields.add(new FieldSchema(last_name, oi.getTypeName(), FROM_SERIALIZER));
    } else {
        List<? extends StructField> fields = ((StructObjectInspector) oi).getAllStructFieldRefs();
        for (int i = 0; i < fields.size(); i++) {
            StructField structField = fields.get(i);
            String fieldName = structField.getFieldName();
            String fieldTypeName = structField.getFieldObjectInspector().getTypeName();
            String fieldComment = determineFieldComment(structField.getFieldComment());
            str_fields.add(new FieldSchema(fieldName, fieldTypeName, fieldComment));
        }
    }
    return str_fields;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Aggregations

ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)57 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)43 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)42 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)35 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)29 ArrayList (java.util.ArrayList)28 List (java.util.List)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)17 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)17 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)17 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)16 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)16 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)16 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)15 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)15 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)15 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)15 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)14