Search in sources :

Example 36 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class ColumnMappings method parseColumnStorageTypes.

/**
 * Utility method for parsing a string of the form '-,b,s,-,s:b,...' as a means of specifying
 * whether to use a binary or an UTF string format to serialize and de-serialize primitive
 * data types like boolean, byte, short, int, long, float, and double. This applies to
 * regular columns and also to map column types which are associated with an HBase column
 * family. For the map types, we apply the specification to the key or the value provided it
 * is one of the above primitive types. The specifier is a colon separated value of the form
 * -:s, or b:b where we have 's', 'b', or '-' on either side of the colon. 's' is for string
 * format storage, 'b' is for native fixed width byte oriented storage, and '-' uses the
 * table level default.
 *
 * @param hbaseTableDefaultStorageType - the specification associated with the table property
 *        hbase.table.default.storage.type
 * @throws SerDeException on parse error.
 */
void parseColumnStorageTypes(String hbaseTableDefaultStorageType) throws SerDeException {
    boolean tableBinaryStorage = false;
    if (hbaseTableDefaultStorageType != null && !"".equals(hbaseTableDefaultStorageType)) {
        if (hbaseTableDefaultStorageType.equals("binary")) {
            tableBinaryStorage = true;
        } else if (!hbaseTableDefaultStorageType.equals("string")) {
            throw new SerDeException("Error: " + HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE + " parameter must be specified as" + " 'string' or 'binary'; '" + hbaseTableDefaultStorageType + "' is not a valid specification for this table/serde property.");
        }
    }
    // byte, short, int, long, float, and double have a binary byte oriented storage option
    for (ColumnMapping colMap : columnsMapping) {
        TypeInfo colType = colMap.columnType;
        String mappingSpec = colMap.mappingSpec;
        String[] mapInfo = mappingSpec.split("#");
        String[] storageInfo = null;
        if (mapInfo.length == 2) {
            storageInfo = mapInfo[1].split(":");
        }
        if (storageInfo == null) {
            // use the table default storage specification
            if (colType.getCategory() == ObjectInspector.Category.PRIMITIVE) {
                if (!colType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
                    colMap.binaryStorage.add(tableBinaryStorage);
                } else {
                    colMap.binaryStorage.add(false);
                }
            } else if (colType.getCategory() == ObjectInspector.Category.MAP) {
                TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo();
                TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo();
                if (keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && !keyTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
                    colMap.binaryStorage.add(tableBinaryStorage);
                } else {
                    colMap.binaryStorage.add(false);
                }
                if (valueTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && !valueTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
                    colMap.binaryStorage.add(tableBinaryStorage);
                } else {
                    colMap.binaryStorage.add(false);
                }
            } else {
                colMap.binaryStorage.add(false);
            }
        } else if (storageInfo.length == 1) {
            // we have a storage specification for a primitive column type
            String storageOption = storageInfo[0];
            if ((colType.getCategory() == ObjectInspector.Category.MAP) || !(storageOption.equals("-") || "string".startsWith(storageOption) || "binary".startsWith(storageOption))) {
                throw new SerDeException("Error: A column storage specification is one of the following:" + " '-', a prefix of 'string', or a prefix of 'binary'. " + storageOption + " is not a valid storage option specification for " + colMap.columnName);
            }
            if (colType.getCategory() == ObjectInspector.Category.PRIMITIVE && !colType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
                if ("-".equals(storageOption)) {
                    colMap.binaryStorage.add(tableBinaryStorage);
                } else if ("binary".startsWith(storageOption)) {
                    colMap.binaryStorage.add(true);
                } else {
                    colMap.binaryStorage.add(false);
                }
            } else {
                colMap.binaryStorage.add(false);
            }
        } else if (storageInfo.length == 2) {
            // we have a storage specification for a map column type
            String keyStorage = storageInfo[0];
            String valStorage = storageInfo[1];
            if ((colType.getCategory() != ObjectInspector.Category.MAP) || !(keyStorage.equals("-") || "string".startsWith(keyStorage) || "binary".startsWith(keyStorage)) || !(valStorage.equals("-") || "string".startsWith(valStorage) || "binary".startsWith(valStorage))) {
                throw new SerDeException("Error: To specify a valid column storage type for a Map" + " column, use any two specifiers from '-', a prefix of 'string', " + " and a prefix of 'binary' separated by a ':'." + " Valid examples are '-:-', 's:b', etc. They specify the storage type for the" + " key and value parts of the Map<?,?> respectively." + " Invalid storage specification for column " + colMap.columnName + "; " + storageInfo[0] + ":" + storageInfo[1]);
            }
            TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo();
            TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo();
            if (keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && !keyTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
                if (keyStorage.equals("-")) {
                    colMap.binaryStorage.add(tableBinaryStorage);
                } else if ("binary".startsWith(keyStorage)) {
                    colMap.binaryStorage.add(true);
                } else {
                    colMap.binaryStorage.add(false);
                }
            } else {
                colMap.binaryStorage.add(false);
            }
            if (valueTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && !valueTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
                if (valStorage.equals("-")) {
                    colMap.binaryStorage.add(tableBinaryStorage);
                } else if ("binary".startsWith(valStorage)) {
                    colMap.binaryStorage.add(true);
                } else {
                    colMap.binaryStorage.add(false);
                }
            } else {
                colMap.binaryStorage.add(false);
            }
            if (colMap.binaryStorage.size() != 2) {
                throw new SerDeException("Error: In parsing the storage specification for column " + colMap.columnName);
            }
        } else {
            // error in storage specification
            throw new SerDeException("Error: " + HBaseSerDe.HBASE_COLUMNS_MAPPING + " storage specification " + mappingSpec + " is not valid for column: " + colMap.columnName);
        }
    }
}
Also used : MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 37 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class ColumnMappings method setHiveColumnDescription.

void setHiveColumnDescription(String serdeName, List<String> columnNames, List<TypeInfo> columnTypes) throws SerDeException {
    if (columnsMapping.length != columnNames.size()) {
        throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while hbase.columns.mapping has " + columnsMapping.length + " elements" + " (counting the key if implicit)");
    }
    // where key extends LazyPrimitive<?, ?> and thus has type Category.PRIMITIVE
    for (int i = 0; i < columnNames.size(); i++) {
        ColumnMapping colMap = columnsMapping[i];
        colMap.columnName = columnNames.get(i);
        colMap.columnType = columnTypes.get(i);
        if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
            TypeInfo typeInfo = columnTypes.get(i);
            if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) || (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE)) {
                throw new SerDeException(serdeName + ": hbase column family '" + colMap.familyName + "' should be mapped to Map<? extends LazyPrimitive<?, ?>,?>, that is " + "the Key for the map should be of primitive type, but is mapped to " + typeInfo.getTypeName());
            }
        }
        if (colMap.hbaseTimestamp) {
            TypeInfo typeInfo = columnTypes.get(i);
            if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) && !colMap.isCategory(PrimitiveCategory.LONG)) {
                throw new SerDeException(serdeName + ": timestamp columns should be of " + "timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
            }
        }
    }
}
Also used : MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 38 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class VectorAssignRow method assignRowColumn.

private void assignRowColumn(ColumnVector columnVector, int batchIndex, TypeInfo targetTypeInfo, Object object) {
    if (object == null) {
        assignNullRowColumn(columnVector, batchIndex, targetTypeInfo);
        return;
    }
    switch(targetTypeInfo.getCategory()) {
        case PRIMITIVE:
            {
                final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory();
                switch(targetPrimitiveCategory) {
                    case VOID:
                        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                        return;
                    case BOOLEAN:
                        if (object instanceof Boolean) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = (((Boolean) object) ? 1 : 0);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = (((BooleanWritable) object).get() ? 1 : 0);
                        }
                        break;
                    case BYTE:
                        if (object instanceof Byte) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Byte) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((ByteWritable) object).get();
                        }
                        break;
                    case SHORT:
                        if (object instanceof Short) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Short) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((ShortWritable) object).get();
                        }
                        break;
                    case INT:
                        if (object instanceof Integer) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Integer) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((IntWritable) object).get();
                        }
                        break;
                    case LONG:
                        if (object instanceof Long) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Long) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((LongWritable) object).get();
                        }
                        break;
                    case TIMESTAMP:
                        if (object instanceof Timestamp) {
                            ((TimestampColumnVector) columnVector).set(batchIndex, ((Timestamp) object).toSqlTimestamp());
                        } else {
                            ((TimestampColumnVector) columnVector).set(batchIndex, ((TimestampWritableV2) object).getTimestamp().toSqlTimestamp());
                        }
                        break;
                    case DATE:
                        if (object instanceof Date) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = DateWritableV2.dateToDays((Date) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((DateWritableV2) object).getDays();
                        }
                        break;
                    case FLOAT:
                        if (object instanceof Float) {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((Float) object);
                        } else {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((FloatWritable) object).get();
                        }
                        break;
                    case DOUBLE:
                        if (object instanceof Double) {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((Double) object);
                        } else {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((DoubleWritable) object).get();
                        }
                        break;
                    case BINARY:
                        {
                            if (object instanceof byte[]) {
                                byte[] bytes = (byte[]) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                            } else {
                                BytesWritable bw = (BytesWritable) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bw.getBytes(), 0, bw.getLength());
                            }
                        }
                        break;
                    case STRING:
                        {
                            if (object instanceof String) {
                                String string = (String) object;
                                byte[] bytes = string.getBytes();
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                            } else {
                                Text tw = (Text) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, tw.getBytes(), 0, tw.getLength());
                            }
                        }
                        break;
                    case VARCHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            // We store VARCHAR type stripped of pads.
                            HiveVarchar hiveVarchar;
                            if (object instanceof HiveVarchar) {
                                hiveVarchar = (HiveVarchar) object;
                            } else {
                                hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar();
                            }
                            // TODO: HIVE-13624 Do we need maxLength checking?
                            byte[] bytes = hiveVarchar.getValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case CHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            // We store CHAR type stripped of pads.
                            HiveChar hiveChar;
                            if (object instanceof HiveChar) {
                                hiveChar = (HiveChar) object;
                            } else {
                                hiveChar = ((HiveCharWritable) object).getHiveChar();
                            }
                            // TODO: HIVE-13624 Do we need maxLength checking?
                            // We store CHAR in vector row batch with padding stripped.
                            byte[] bytes = hiveChar.getStrippedValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case DECIMAL:
                        if (columnVector instanceof DecimalColumnVector) {
                            if (object instanceof HiveDecimal) {
                                ((DecimalColumnVector) columnVector).set(batchIndex, (HiveDecimal) object);
                            } else {
                                ((DecimalColumnVector) columnVector).set(batchIndex, (HiveDecimalWritable) object);
                            }
                        } else {
                            if (object instanceof HiveDecimal) {
                                ((Decimal64ColumnVector) columnVector).set(batchIndex, (HiveDecimal) object);
                            } else {
                                ((Decimal64ColumnVector) columnVector).set(batchIndex, (HiveDecimalWritable) object);
                            }
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        if (object instanceof HiveIntervalYearMonth) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((HiveIntervalYearMonth) object).getTotalMonths();
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths();
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        if (object instanceof HiveIntervalDayTime) {
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, (HiveIntervalDayTime) object);
                        } else {
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime());
                        }
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported");
                }
            }
            break;
        case LIST:
            {
                final ListColumnVector listColumnVector = (ListColumnVector) columnVector;
                final ListTypeInfo listTypeInfo = (ListTypeInfo) targetTypeInfo;
                final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
                final List list = (List) object;
                final int size = list.size();
                final int childCount = listColumnVector.childCount;
                listColumnVector.offsets[batchIndex] = childCount;
                listColumnVector.lengths[batchIndex] = size;
                listColumnVector.childCount = childCount + size;
                listColumnVector.child.ensureSize(childCount + size, true);
                for (int i = 0; i < size; i++) {
                    assignRowColumn(listColumnVector.child, childCount + i, elementTypeInfo, list.get(i));
                }
            }
            break;
        case MAP:
            {
                final MapColumnVector mapColumnVector = (MapColumnVector) columnVector;
                final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo;
                final Map<Object, Object> map = (Map<Object, Object>) object;
                final int size = map.size();
                int childCount = mapColumnVector.childCount;
                mapColumnVector.offsets[batchIndex] = childCount;
                mapColumnVector.lengths[batchIndex] = size;
                mapColumnVector.keys.ensureSize(childCount + size, true);
                mapColumnVector.values.ensureSize(childCount + size, true);
                for (Map.Entry<Object, Object> entry : map.entrySet()) {
                    assignRowColumn(mapColumnVector.keys, childCount, mapTypeInfo.getMapKeyTypeInfo(), entry.getKey());
                    assignRowColumn(mapColumnVector.values, childCount, mapTypeInfo.getMapValueTypeInfo(), entry.getValue());
                    childCount++;
                }
                mapColumnVector.childCount = childCount;
            }
            break;
        case STRUCT:
            {
                final StructColumnVector structColumnVector = (StructColumnVector) columnVector;
                final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo;
                final List<TypeInfo> targetFieldTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos();
                final int size = targetFieldTypeInfos.size();
                if (object instanceof List) {
                    final List struct = (List) object;
                    for (int i = 0; i < size; i++) {
                        assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), struct.get(i));
                    }
                } else {
                    final Object[] array = (Object[]) object;
                    for (int i = 0; i < size; i++) {
                        assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), array[i]);
                    }
                }
            }
            break;
        case UNION:
            {
                final StandardUnion union = (StandardUnion) object;
                final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector;
                final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo;
                final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final byte tag = union.getTag();
                unionColumnVector.tags[batchIndex] = tag;
                assignRowColumn(unionColumnVector.fields[tag], batchIndex, objectTypeInfos.get(tag), union.getObject());
            }
            break;
        default:
            throw new RuntimeException("Category " + targetTypeInfo.getCategory().name() + " not supported");
    }
    /*
     * We always set the null flag to false when there is a value.
     */
    columnVector.isNull[batchIndex] = false;
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) List(java.util.List) ArrayList(java.util.ArrayList) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(org.apache.hadoop.hive.common.type.Date) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) Map(java.util.Map) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 39 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class VectorAssignRow method assignConvertRowColumn.

private void assignConvertRowColumn(ColumnVector columnVector, int batchIndex, TypeInfo targetTypeInfo, ObjectInspector sourceObjectInspector, Writable convertTargetWritable, Object object) {
    final Category targetCategory = targetTypeInfo.getCategory();
    if (targetCategory == null) {
        /*
       * This is a column that we don't want (i.e. not included) -- we are done.
       */
        return;
    }
    if (object == null) {
        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
        return;
    }
    try {
        switch(targetCategory) {
            case PRIMITIVE:
                final PrimitiveObjectInspector sourcePrimitiveOI = (PrimitiveObjectInspector) sourceObjectInspector;
                final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory();
                switch(targetPrimitiveCategory) {
                    case VOID:
                        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                        return;
                    case BOOLEAN:
                        ((LongColumnVector) columnVector).vector[batchIndex] = (PrimitiveObjectInspectorUtils.getBoolean(object, sourcePrimitiveOI) ? 1 : 0);
                        break;
                    case BYTE:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getByte(object, sourcePrimitiveOI);
                        break;
                    case SHORT:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getShort(object, sourcePrimitiveOI);
                        break;
                    case INT:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getInt(object, sourcePrimitiveOI);
                        break;
                    case LONG:
                        ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getLong(object, sourcePrimitiveOI);
                        break;
                    case TIMESTAMP:
                        {
                            final Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestamp(object, sourcePrimitiveOI);
                            if (timestamp == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((TimestampColumnVector) columnVector).set(batchIndex, timestamp.toSqlTimestamp());
                        }
                        break;
                    case DATE:
                        {
                            final Date date = PrimitiveObjectInspectorUtils.getDate(object, sourcePrimitiveOI);
                            if (date == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            DateWritableV2 dateWritable = (DateWritableV2) convertTargetWritable;
                            if (dateWritable == null) {
                                dateWritable = new DateWritableV2();
                            }
                            dateWritable.set(date);
                            ((LongColumnVector) columnVector).vector[batchIndex] = dateWritable.getDays();
                        }
                        break;
                    case FLOAT:
                        ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getFloat(object, sourcePrimitiveOI);
                        break;
                    case DOUBLE:
                        ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getDouble(object, sourcePrimitiveOI);
                        break;
                    case BINARY:
                        {
                            final BytesWritable bytesWritable = PrimitiveObjectInspectorUtils.getBinary(object, sourcePrimitiveOI);
                            if (bytesWritable == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength());
                        }
                        break;
                    case STRING:
                        {
                            final String string = PrimitiveObjectInspectorUtils.getString(object, sourcePrimitiveOI);
                            if (string == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            Text text = (Text) convertTargetWritable;
                            if (text == null) {
                                text = new Text();
                            }
                            text.set(string);
                            ((BytesColumnVector) columnVector).setVal(batchIndex, text.getBytes(), 0, text.getLength());
                        }
                        break;
                    case VARCHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            final HiveVarchar hiveVarchar = PrimitiveObjectInspectorUtils.getHiveVarchar(object, sourcePrimitiveOI);
                            if (hiveVarchar == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            // TODO: Do we need maxLength checking?
                            byte[] bytes = hiveVarchar.getValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case CHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            final HiveChar hiveChar = PrimitiveObjectInspectorUtils.getHiveChar(object, sourcePrimitiveOI);
                            if (hiveChar == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            // We store CHAR in vector row batch with padding stripped.
                            // TODO: Do we need maxLength checking?
                            final byte[] bytes = hiveChar.getStrippedValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case DECIMAL:
                        {
                            final HiveDecimal hiveDecimal = PrimitiveObjectInspectorUtils.getHiveDecimal(object, sourcePrimitiveOI);
                            if (hiveDecimal == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            if (columnVector instanceof Decimal64ColumnVector) {
                                Decimal64ColumnVector dec64ColVector = (Decimal64ColumnVector) columnVector;
                                dec64ColVector.set(batchIndex, hiveDecimal);
                                if (dec64ColVector.isNull[batchIndex]) {
                                    return;
                                }
                            } else {
                                ((DecimalColumnVector) columnVector).set(batchIndex, hiveDecimal);
                            }
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            final HiveIntervalYearMonth intervalYearMonth = PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth(object, sourcePrimitiveOI);
                            if (intervalYearMonth == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((LongColumnVector) columnVector).vector[batchIndex] = intervalYearMonth.getTotalMonths();
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            final HiveIntervalDayTime intervalDayTime = PrimitiveObjectInspectorUtils.getHiveIntervalDayTime(object, sourcePrimitiveOI);
                            if (intervalDayTime == null) {
                                VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                                return;
                            }
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, intervalDayTime);
                        }
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported");
                }
                break;
            case LIST:
                {
                    final ListColumnVector listColumnVector = (ListColumnVector) columnVector;
                    final ListObjectInspector sourceListOI = (ListObjectInspector) sourceObjectInspector;
                    final ObjectInspector sourceElementOI = sourceListOI.getListElementObjectInspector();
                    final int size = sourceListOI.getListLength(object);
                    final TypeInfo targetElementTypeInfo = ((ListTypeInfo) targetTypeInfo).getListElementTypeInfo();
                    listColumnVector.offsets[batchIndex] = listColumnVector.childCount;
                    listColumnVector.childCount += size;
                    listColumnVector.ensureSize(listColumnVector.childCount, true);
                    listColumnVector.lengths[batchIndex] = size;
                    for (int i = 0; i < size; i++) {
                        final Object element = sourceListOI.getListElement(object, i);
                        final int offset = (int) (listColumnVector.offsets[batchIndex] + i);
                        assignConvertRowColumn(listColumnVector.child, offset, targetElementTypeInfo, sourceElementOI, null, element);
                    }
                }
                break;
            case MAP:
                {
                    final MapColumnVector mapColumnVector = (MapColumnVector) columnVector;
                    final MapObjectInspector mapObjectInspector = (MapObjectInspector) sourceObjectInspector;
                    final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo;
                    final Map<?, ?> map = mapObjectInspector.getMap(object);
                    for (Map.Entry<?, ?> entry : map.entrySet()) {
                        assignConvertRowColumn(mapColumnVector.keys, batchIndex, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), null, entry.getKey());
                        assignConvertRowColumn(mapColumnVector.values, batchIndex, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), null, entry.getValue());
                    }
                }
                break;
            case STRUCT:
                {
                    final StructColumnVector structColumnVector = (StructColumnVector) columnVector;
                    final StructObjectInspector sourceStructOI = (StructObjectInspector) sourceObjectInspector;
                    final List<? extends StructField> sourceFields = sourceStructOI.getAllStructFieldRefs();
                    final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo;
                    final List<TypeInfo> targetTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos();
                    final int size = targetTypeInfos.size();
                    for (int i = 0; i < size; i++) {
                        if (i < sourceFields.size()) {
                            final StructField sourceStructField = sourceFields.get(i);
                            final ObjectInspector sourceFieldOI = sourceStructField.getFieldObjectInspector();
                            final Object sourceData = sourceStructOI.getStructFieldData(object, sourceStructField);
                            assignConvertRowColumn(structColumnVector.fields[i], batchIndex, targetTypeInfos.get(i), sourceFieldOI, null, sourceData);
                        } else {
                            final ColumnVector fieldColumnVector = structColumnVector.fields[i];
                            VectorizedBatchUtil.setNullColIsNullValue(fieldColumnVector, batchIndex);
                        }
                    }
                }
                break;
            case UNION:
                {
                    final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector;
                    final UnionObjectInspector unionObjectInspector = (UnionObjectInspector) sourceObjectInspector;
                    final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo;
                    final int tag = unionObjectInspector.getTag(object);
                    assignConvertRowColumn(unionColumnVector.fields[tag], batchIndex, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), unionObjectInspector.getObjectInspectors().get(tag), null, unionObjectInspector.getField(tag));
                }
                break;
            default:
                throw new RuntimeException("Category " + targetCategory.name() + " not supported");
        }
    } catch (NumberFormatException e) {
        // Some of the conversion methods throw this exception on numeric parsing errors.
        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
        return;
    }
    // We always set the null flag to false when there is a value.
    columnVector.isNull[batchIndex] = false;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) List(java.util.List) ArrayList(java.util.ArrayList) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(org.apache.hadoop.hive.common.type.Date) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 40 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class HCatRecordObjectInspectorFactory method getStandardObjectInspectorFromTypeInfo.

public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) {
    ObjectInspector oi = cachedObjectInspectors.getIfPresent(typeInfo);
    if (oi == null) {
        LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName());
        switch(typeInfo.getCategory()) {
            case PRIMITIVE:
                oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((PrimitiveTypeInfo) typeInfo);
                break;
            case STRUCT:
                StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
                List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
                for (int i = 0; i < fieldTypeInfos.size(); i++) {
                    fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
                }
                oi = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
                break;
            case LIST:
                ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo(((ListTypeInfo) typeInfo).getListElementTypeInfo());
                oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector);
                break;
            case MAP:
                ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo(((MapTypeInfo) typeInfo).getMapKeyTypeInfo());
                ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo(((MapTypeInfo) typeInfo).getMapValueTypeInfo());
                oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector);
                break;
            default:
                oi = null;
        }
        cachedObjectInspectors.asMap().put(typeInfo, oi);
    }
    return oi;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)83 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)75 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)74 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)64 ArrayList (java.util.ArrayList)54 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)52 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)47 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)46 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)45 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)44 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)43 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)38 BytesWritable (org.apache.hadoop.io.BytesWritable)36 Text (org.apache.hadoop.io.Text)35 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)34 List (java.util.List)30 Map (java.util.Map)30 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)30 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)30 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)27