Search in sources :

Example 36 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class GenericUDFLikeAll method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length < 2) {
        throw new UDFArgumentLengthException("The like all operator requires at least one pattern for matching, got " + (arguments.length - 1));
    }
    inputTypes = new PrimitiveCategory[arguments.length];
    converters = new Converter[arguments.length];
    /**
     *expects string or null arguments
     */
    for (int idx = 0; idx < arguments.length; idx++) {
        checkArgPrimitive(arguments, idx);
        checkArgGroups(arguments, idx, inputTypes, PrimitiveGrouping.STRING_GROUP, PrimitiveGrouping.VOID_GROUP);
        PrimitiveCategory inputType = ((PrimitiveObjectInspector) arguments[idx]).getPrimitiveCategory();
        if (arguments[idx] instanceof ConstantObjectInspector && idx != 0) {
            Object constValue = ((ConstantObjectInspector) arguments[idx]).getWritableConstantValue();
            if (!isConstantNullPatternContain && constValue == null) {
                isConstantNullPatternContain = true;
            }
        } else if (idx != 0 && isAllPatternsConstant) {
            isAllPatternsConstant = false;
        }
        converters[idx] = ObjectInspectorConverters.getConverter(arguments[idx], getOutputOI(inputType));
        inputTypes[idx] = inputType;
    }
    return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
}
Also used : UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 37 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class VectorAssignRow method assignConvertRowColumn.

private void assignConvertRowColumn(ColumnVector columnVector, int batchIndex, TypeInfo targetTypeInfo, ObjectInspector sourceObjectInspector, Writable convertTargetWritable, Object object) {
    final Category targetCategory = targetTypeInfo.getCategory();
    if (targetCategory == null) {
        /*
       * This is a column that we don't want (i.e. not included) -- we are done.
       */
        return;
    }
    if (object == null) {
        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
        return;
    }
    try {
        switch(targetCategory) {
            case PRIMITIVE:
                final PrimitiveObjectInspector sourcePrimitiveOI = (PrimitiveObjectInspector) sourceObjectInspector;
                final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory();
                switch(targetPrimitiveCategory) {
                    case VOID:
                        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                        return;
                    case BOOLEAN:
                        ((LongColumnVector) columnVector).vector[batchIndex] = (PrimitiveObjectInspectorUtils.getBoolean(object, sourcePrimitiveOI) ? 1 : 0);
                        break;
                    case BYTE:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getByte(object, sourcePrimitiveOI);
                        break;
                    case SHORT:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getShort(object, sourcePrimitiveOI);
                        break;
                    case INT:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getInt(object, sourcePrimitiveOI);
                        break;
                    case LONG:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getLong(object, sourcePrimitiveOI);
                        break;
                    case TIMESTAMP:
                        {
                            final Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestamp(object, sourcePrimitiveOI);
                            if (timestamp == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((TimestampColumnVector) columnVector).set(batchIndex, timestamp);
                        }
                        break;
                    case DATE:
                        {
                            final Date date = PrimitiveObjectInspectorUtils.getDate(object, sourcePrimitiveOI);
                            if (date == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            DateWritable dateWritable = (DateWritable) convertTargetWritable;
                            if (dateWritable == null) {
                                dateWritable = new DateWritable();
                            }
                            dateWritable.set(date);
                            ((LongColumnVector) columnVector).vector[batchIndex] = dateWritable.getDays();
                        }
                        break;
                    case FLOAT:
                        ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getFloat(object, sourcePrimitiveOI);
                        break;
                    case DOUBLE:
                        ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getDouble(object, sourcePrimitiveOI);
                        break;
                    case BINARY:
                        {
                            final BytesWritable bytesWritable = PrimitiveObjectInspectorUtils.getBinary(object, sourcePrimitiveOI);
                            if (bytesWritable == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength());
                        }
                        break;
                    case STRING:
                        {
                            final String string = PrimitiveObjectInspectorUtils.getString(object, sourcePrimitiveOI);
                            if (string == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            Text text = (Text) convertTargetWritable;
                            if (text == null) {
                                text = new Text();
                            }
                            text.set(string);
                            ((BytesColumnVector) columnVector).setVal(batchIndex, text.getBytes(), 0, text.getLength());
                        }
                        break;
                    case VARCHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            final HiveVarchar hiveVarchar = PrimitiveObjectInspectorUtils.getHiveVarchar(object, sourcePrimitiveOI);
                            if (hiveVarchar == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            // TODO: Do we need maxLength checking?
                            byte[] bytes = hiveVarchar.getValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case CHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            final HiveChar hiveChar = PrimitiveObjectInspectorUtils.getHiveChar(object, sourcePrimitiveOI);
                            if (hiveChar == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            // We store CHAR in vector row batch with padding stripped.
                            // TODO: Do we need maxLength checking?
                            final byte[] bytes = hiveChar.getStrippedValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case DECIMAL:
                        {
                            final HiveDecimal hiveDecimal = PrimitiveObjectInspectorUtils.getHiveDecimal(object, sourcePrimitiveOI);
                            if (hiveDecimal == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            if (columnVector instanceof Decimal64ColumnVector) {
                                Decimal64ColumnVector dec64ColVector = (Decimal64ColumnVector) columnVector;
                                dec64ColVector.set(batchIndex, hiveDecimal);
                                if (dec64ColVector.isNull[batchIndex]) {
                                    return;
                                }
                            } else {
                                ((DecimalColumnVector) columnVector).set(batchIndex, hiveDecimal);
                            }
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            final HiveIntervalYearMonth intervalYearMonth = PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth(object, sourcePrimitiveOI);
                            if (intervalYearMonth == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((LongColumnVector) columnVector).vector[batchIndex] = intervalYearMonth.getTotalMonths();
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            final HiveIntervalDayTime intervalDayTime = PrimitiveObjectInspectorUtils.getHiveIntervalDayTime(object, sourcePrimitiveOI);
                            if (intervalDayTime == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, intervalDayTime);
                        }
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported");
                }
                break;
            case LIST:
                {
                    final ListColumnVector listColumnVector = (ListColumnVector) columnVector;
                    final ListObjectInspector sourceListOI = (ListObjectInspector) sourceObjectInspector;
                    final ObjectInspector sourceElementOI = sourceListOI.getListElementObjectInspector();
                    final int size = sourceListOI.getListLength(object);
                    final TypeInfo targetElementTypeInfo = ((ListTypeInfo) targetTypeInfo).getListElementTypeInfo();
                    listColumnVector.offsets[batchIndex] = listColumnVector.childCount;
                    listColumnVector.childCount += size;
                    listColumnVector.ensureSize(listColumnVector.childCount, true);
                    listColumnVector.lengths[batchIndex] = size;
                    for (int i = 0; i < size; i++) {
                        final Object element = sourceListOI.getListElement(object, i);
                        final int offset = (int) (listColumnVector.offsets[batchIndex] + i);
                        assignConvertRowColumn(listColumnVector.child, offset, targetElementTypeInfo, sourceElementOI, null, element);
                    }
                }
                break;
            case MAP:
                {
                    final MapColumnVector mapColumnVector = (MapColumnVector) columnVector;
                    final MapObjectInspector mapObjectInspector = (MapObjectInspector) sourceObjectInspector;
                    final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo;
                    final Map<?, ?> map = mapObjectInspector.getMap(object);
                    for (Map.Entry<?, ?> entry : map.entrySet()) {
                        assignConvertRowColumn(mapColumnVector.keys, batchIndex, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), null, entry.getKey());
                        assignConvertRowColumn(mapColumnVector.values, batchIndex, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), null, entry.getValue());
                    }
                }
                break;
            case STRUCT:
                {
                    final StructColumnVector structColumnVector = (StructColumnVector) columnVector;
                    final StructObjectInspector sourceStructOI = (StructObjectInspector) sourceObjectInspector;
                    final List<? extends StructField> sourceFields = sourceStructOI.getAllStructFieldRefs();
                    final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo;
                    final List<TypeInfo> targetTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos();
                    final int size = targetTypeInfos.size();
                    for (int i = 0; i < size; i++) {
                        if (i < sourceFields.size()) {
                            final StructField sourceStructField = sourceFields.get(i);
                            final ObjectInspector sourceFieldOI = sourceStructField.getFieldObjectInspector();
                            final Object sourceData = sourceStructOI.getStructFieldData(object, sourceStructField);
                            assignConvertRowColumn(structColumnVector.fields[i], batchIndex, targetTypeInfos.get(i), sourceFieldOI, null, sourceData);
                        } else {
                            final ColumnVector fieldColumnVector = structColumnVector.fields[i];
                            VectorizedBatchUtil.setNullColIsNullValue(fieldColumnVector, batchIndex);
                        }
                    }
                }
                break;
            case UNION:
                {
                    final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector;
                    final UnionObjectInspector unionObjectInspector = (UnionObjectInspector) sourceObjectInspector;
                    final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo;
                    final int tag = unionObjectInspector.getTag(object);
                    assignConvertRowColumn(unionColumnVector.fields[tag], batchIndex, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), unionObjectInspector.getObjectInspectors().get(tag), null, unionObjectInspector.getField(tag));
                }
                break;
            default:
                throw new RuntimeException("Category " + targetCategory.name() + " not supported");
        }
    } catch (NumberFormatException e) {
        // Some of the conversion methods throw this exception on numeric parsing errors.
        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
        return;
    }
    // We always set the null flag to false when there is a value.
    columnVector.isNull[batchIndex] = false;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) List(java.util.List) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(java.sql.Date) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 38 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class VectorDeserializeRow method allocatePrimitiveField.

private Field allocatePrimitiveField(TypeInfo sourceTypeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) {
    final PrimitiveTypeInfo sourcePrimitiveTypeInfo = (PrimitiveTypeInfo) sourceTypeInfo;
    final PrimitiveCategory sourcePrimitiveCategory = sourcePrimitiveTypeInfo.getPrimitiveCategory();
    final int maxLength;
    switch(sourcePrimitiveCategory) {
        case CHAR:
            maxLength = ((CharTypeInfo) sourcePrimitiveTypeInfo).getLength();
            break;
        case VARCHAR:
            maxLength = ((VarcharTypeInfo) sourcePrimitiveTypeInfo).getLength();
            break;
        default:
            // No additional data type specific setting.
            maxLength = 0;
            break;
    }
    return new Field(sourcePrimitiveCategory, dataTypePhysicalVariation, maxLength);
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 39 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class VectorExtractRow method extractRowColumn.

public Object extractRowColumn(ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) {
    if (colVector == null) {
        // may ask for them..
        return null;
    }
    final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
    if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
        return null;
    }
    final Category category = typeInfo.getCategory();
    switch(category) {
        case PRIMITIVE:
            {
                final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
                final Writable primitiveWritable = VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
                switch(primitiveCategory) {
                    case VOID:
                        return null;
                    case BOOLEAN:
                        ((BooleanWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true);
                        return primitiveWritable;
                    case BYTE:
                        ((ByteWritable) primitiveWritable).set((byte) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case SHORT:
                        ((ShortWritable) primitiveWritable).set((short) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INT:
                        ((IntWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case LONG:
                        ((LongWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case TIMESTAMP:
                        ((TimestampWritable) primitiveWritable).set(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex));
                        return primitiveWritable;
                    case DATE:
                        ((DateWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case FLOAT:
                        ((FloatWritable) primitiveWritable).set((float) ((DoubleColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case DOUBLE:
                        ((DoubleWritable) primitiveWritable).set(((DoubleColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case BINARY:
                        {
                            final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
                            final byte[] bytes = bytesColVector.vector[adjustedIndex];
                            final int start = bytesColVector.start[adjustedIndex];
                            final int length = bytesColVector.length[adjustedIndex];
                            if (bytesColVector.isRepeating) {
                                if (!bytesColVector.isNull[0] && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            } else {
                                if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            }
                            BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
                            bytesWritable.set(bytes, start, length);
                            return primitiveWritable;
                        }
                    case STRING:
                        {
                            final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
                            final byte[] bytes = bytesColVector.vector[adjustedIndex];
                            final int start = bytesColVector.start[adjustedIndex];
                            final int length = bytesColVector.length[adjustedIndex];
                            if (bytesColVector.isRepeating) {
                                if (!bytesColVector.isNull[0] && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            } else {
                                if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            }
                            // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
                            ((Text) primitiveWritable).set(bytes, start, length);
                            return primitiveWritable;
                        }
                    case VARCHAR:
                        {
                            final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
                            final byte[] bytes = bytesColVector.vector[adjustedIndex];
                            final int start = bytesColVector.start[adjustedIndex];
                            final int length = bytesColVector.length[adjustedIndex];
                            if (bytesColVector.isRepeating) {
                                if (!bytesColVector.isNull[0] && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            } else {
                                if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            }
                            final int adjustedLength = StringExpr.truncate(bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
                            final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
                            hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
                            return primitiveWritable;
                        }
                    case CHAR:
                        {
                            final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
                            final byte[] bytes = bytesColVector.vector[adjustedIndex];
                            final int start = bytesColVector.start[adjustedIndex];
                            final int length = bytesColVector.length[adjustedIndex];
                            if (bytesColVector.isRepeating) {
                                if (!bytesColVector.isNull[0] && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            } else {
                                if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
                                    nullBytesReadError(primitiveCategory, batchIndex);
                                }
                            }
                            final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, ((CharTypeInfo) primitiveTypeInfo).getLength());
                            final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
                            hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), ((CharTypeInfo) primitiveTypeInfo).getLength());
                            return primitiveWritable;
                        }
                    case DECIMAL:
                        if (colVector instanceof Decimal64ColumnVector) {
                            Decimal64ColumnVector dec32ColVector = (Decimal64ColumnVector) colVector;
                            ((HiveDecimalWritable) primitiveWritable).deserialize64(dec32ColVector.vector[adjustedIndex], dec32ColVector.scale);
                        } else {
                            // The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
                            ((HiveDecimalWritable) primitiveWritable).set(((DecimalColumnVector) colVector).vector[adjustedIndex]);
                        }
                        return primitiveWritable;
                    case INTERVAL_YEAR_MONTH:
                        ((HiveIntervalYearMonthWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INTERVAL_DAY_TIME:
                        ((HiveIntervalDayTimeWritable) primitiveWritable).set(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex));
                        return primitiveWritable;
                    default:
                        throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
                }
            }
        case LIST:
            {
                final ListColumnVector listColumnVector = (ListColumnVector) colVector;
                final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
                final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
                final int offset = (int) listColumnVector.offsets[adjustedIndex];
                final int size = (int) listColumnVector.lengths[adjustedIndex];
                final List list = new ArrayList();
                for (int i = 0; i < size; i++) {
                    list.add(extractRowColumn(listColumnVector.child, listTypeInfo.getListElementTypeInfo(), listObjectInspector.getListElementObjectInspector(), offset + i));
                }
                return list;
            }
        case MAP:
            {
                final MapColumnVector mapColumnVector = (MapColumnVector) colVector;
                final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
                final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector;
                final int offset = (int) mapColumnVector.offsets[adjustedIndex];
                final int size = (int) mapColumnVector.lengths[adjustedIndex];
                final Map map = new HashMap();
                for (int i = 0; i < size; i++) {
                    final Object key = extractRowColumn(mapColumnVector.keys, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), offset + i);
                    final Object value = extractRowColumn(mapColumnVector.values, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), offset + i);
                    map.put(key, value);
                }
                return map;
            }
        case STRUCT:
            {
                final StructColumnVector structColumnVector = (StructColumnVector) colVector;
                final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                final StandardStructObjectInspector structInspector = (StandardStructObjectInspector) objectInspector;
                final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                final int size = fieldTypeInfos.size();
                final List<? extends StructField> structFields = structInspector.getAllStructFieldRefs();
                final Object struct = structInspector.create();
                for (int i = 0; i < size; i++) {
                    final StructField structField = structFields.get(i);
                    final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
                    final Object value = extractRowColumn(structColumnVector.fields[i], fieldTypeInfo, structField.getFieldObjectInspector(), adjustedIndex);
                    structInspector.setStructFieldData(struct, structField, value);
                }
                return struct;
            }
        case UNION:
            {
                final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector;
                final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors();
                final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector;
                final byte tag = (byte) unionColumnVector.tags[adjustedIndex];
                final Object object = extractRowColumn(unionColumnVector.fields[tag], objectTypeInfos.get(tag), unionInspectors.get(tag), adjustedIndex);
                final StandardUnion standardUnion = new StandardUnion();
                standardUnion.setTag(tag);
                standardUnion.setObject(object);
                return standardUnion;
            }
        default:
            throw new RuntimeException("Category " + category.name() + " not supported");
    }
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) ArrayList(java.util.ArrayList) List(java.util.List) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) HashMap(java.util.HashMap) Map(java.util.Map) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 40 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class VectorizationContext method getStructInExpression.

private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    VectorExpression expr = null;
    StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
    ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    final int fieldCount = fieldTypeInfos.size();
    ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
    InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
    for (int f = 0; f < fieldCount; f++) {
        TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
        // Only primitive fields supports for now.
        if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
            return null;
        }
        // We are going to serialize using the 4 basic types.
        ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
        fieldVectorColumnTypes[f] = fieldVectorColumnType;
        // We currently evaluate the IN (..) constants in special ways.
        PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
        InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
        fieldInConstantTypes[f] = inConstantType;
    }
    Output buffer = new Output();
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
    final int inChildrenCount = inChildren.size();
    byte[][] serializedInChildren = new byte[inChildrenCount][];
    try {
        for (int i = 0; i < inChildrenCount; i++) {
            final ExprNodeDesc node = inChildren.get(i);
            final Object[] constants;
            if (node instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
                ConstantObjectInspector output = constNode.getWritableObjectInspector();
                constants = ((List<?>) output.getWritableConstantValue()).toArray();
            } else {
                ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
                ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
                ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
                constants = (Object[]) evaluator.evaluate(null);
            }
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < fieldCount; f++) {
                Object constant = constants[f];
                if (constant == null) {
                    binarySortableSerializeWrite.writeNull();
                } else {
                    InConstantType inConstantType = fieldInConstantTypes[f];
                    switch(inConstantType) {
                        case STRING_FAMILY:
                            {
                                byte[] bytes;
                                if (constant instanceof Text) {
                                    Text text = (Text) constant;
                                    bytes = text.getBytes();
                                    binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
                                } else {
                                    throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
                                }
                            }
                            break;
                        case INT_FAMILY:
                            {
                                long value;
                                if (constant instanceof IntWritable) {
                                    value = ((IntWritable) constant).get();
                                } else if (constant instanceof LongWritable) {
                                    value = ((LongWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeLong(value);
                            }
                            break;
                        case FLOAT_FAMILY:
                            {
                                double value;
                                if (constant instanceof DoubleWritable) {
                                    value = ((DoubleWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeDouble(value);
                            }
                            break;
                        // UNDONE...
                        case DATE:
                        case TIMESTAMP:
                        case DECIMAL:
                        default:
                            throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
                    }
                }
            }
            serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
    // Create a single child representing the scratch column where we will
    // generate the serialized keys of the batch.
    int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
    Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
    expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
    ((IStringInExpr) expr).setInListValues(serializedInChildren);
    ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
    ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) IntWritable(org.apache.hadoop.io.IntWritable) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InputExpressionType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)

Aggregations

PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)84 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)45 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)26 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 ArrayList (java.util.ArrayList)15 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)15 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)14 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)12 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)12 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)11 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)11 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)11 Test (org.junit.Test)11 UDFArgumentTypeException (org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)10 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)10 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)9 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)9 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)8 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)8 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)8