Search in sources :

Example 56 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class JsonSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    List<TypeInfo> columnTypes;
    StructTypeInfo rowTypeInfo;
    LOG.debug("Initializing JsonSerDe: {}", tbl.entrySet());
    // Get column names and types
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    // all table column names
    if (columnNameProperty.isEmpty()) {
        columnNames = Collections.emptyList();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    }
    // all column types
    if (columnTypeProperty.isEmpty()) {
        columnTypes = Collections.emptyList();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    LOG.debug("columns: {}, {}", columnNameProperty, columnNames);
    LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes);
    assert (columnNames.size() == columnTypes.size());
    rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
    try {
        schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
        LOG.debug("schema : {}", schema);
        LOG.debug("fields : {}", schema.getFieldNames());
    } catch (HCatException e) {
        throw new SerDeException(e);
    }
    jsonFactory = new JsonFactory();
    tsParser = new TimestampParser(HiveStringUtils.splitAndUnEscape(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS)));
}
Also used : TimestampParser(org.apache.hive.common.util.TimestampParser) HCatException(org.apache.hive.hcatalog.common.HCatException) JsonFactory(org.codehaus.jackson.JsonFactory) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 57 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VectorSerializeRow method serializeStructWrite.

private void serializeStructWrite(StructColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException {
    StructTypeInfo typeInfo = (StructTypeInfo) field.typeInfo;
    StructObjectInspector objectInspector = (StructObjectInspector) field.objectInspector;
    final ColumnVector[] fieldColumnVectors = colVector.fields;
    final Field[] children = field.children;
    final List<? extends StructField> structFields = objectInspector.getAllStructFieldRefs();
    final int size = field.count;
    final List list = (List) vectorExtractRow.extractRowColumn(colVector, typeInfo, objectInspector, adjustedBatchIndex);
    serializeWrite.beginStruct(list);
    for (int i = 0; i < size; i++) {
        if (i > 0) {
            serializeWrite.separateStruct();
        }
        serializeWrite(fieldColumnVectors[i], children[i], adjustedBatchIndex);
    }
    serializeWrite.finishStruct();
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) List(java.util.List) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 58 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VectorSerializeRow method createField.

private Field createField(TypeInfo typeInfo) {
    final Field field = new Field();
    final Category category = typeInfo.getCategory();
    field.category = category;
    field.typeInfo = typeInfo;
    if (category == Category.PRIMITIVE) {
        field.isPrimitive = true;
        field.primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
    } else {
        field.isPrimitive = false;
        field.objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
        switch(category) {
            case LIST:
                field.children = new Field[1];
                field.children[0] = createField(((ListTypeInfo) typeInfo).getListElementTypeInfo());
                break;
            case MAP:
                field.children = new Field[2];
                field.children[0] = createField(((MapTypeInfo) typeInfo).getMapKeyTypeInfo());
                field.children[1] = createField(((MapTypeInfo) typeInfo).getMapValueTypeInfo());
                break;
            case STRUCT:
                StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                field.children = createFields(fieldTypeInfos.toArray(new TypeInfo[fieldTypeInfos.size()]));
                break;
            case UNION:
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                field.children = createFields(objectTypeInfos.toArray(new TypeInfo[objectTypeInfos.size()]));
                break;
            default:
                throw new RuntimeException();
        }
        field.count = field.children.length;
    }
    return field;
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 59 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VectorizationContext method getStructFieldIndex.

/**
 * The field of Struct is stored in StructColumnVector.fields[index].
 * Check the StructTypeInfo.getAllStructFieldNames() and compare to the field name, get the index.
 */
private int getStructFieldIndex(ExprNodeFieldDesc exprNodeFieldDesc) throws HiveException {
    ExprNodeDesc structNodeDesc = exprNodeFieldDesc.getDesc();
    String fieldName = exprNodeFieldDesc.getFieldName();
    StructTypeInfo structTypeInfo = (StructTypeInfo) structNodeDesc.getTypeInfo();
    int index = 0;
    boolean isFieldExist = false;
    for (String fn : structTypeInfo.getAllStructFieldNames()) {
        if (fieldName.equals(fn)) {
            isFieldExist = true;
            break;
        }
        index++;
    }
    if (isFieldExist) {
        return index;
    } else {
        throw new HiveException("Could not vectorize expression:" + exprNodeFieldDesc.toString() + ", the field " + fieldName + " doesn't exist.");
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 60 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VectorVerifyFast method serializeWrite.

public static void serializeWrite(SerializeWrite serializeWrite, TypeInfo typeInfo, Object object) throws IOException {
    if (object == null) {
        serializeWrite.writeNull();
        return;
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                switch(primitiveTypeInfo.getPrimitiveCategory()) {
                    case BOOLEAN:
                        {
                            boolean value = ((BooleanWritable) object).get();
                            serializeWrite.writeBoolean(value);
                        }
                        break;
                    case BYTE:
                        {
                            byte value = ((ByteWritable) object).get();
                            serializeWrite.writeByte(value);
                        }
                        break;
                    case SHORT:
                        {
                            short value = ((ShortWritable) object).get();
                            serializeWrite.writeShort(value);
                        }
                        break;
                    case INT:
                        {
                            int value = ((IntWritable) object).get();
                            serializeWrite.writeInt(value);
                        }
                        break;
                    case LONG:
                        {
                            long value = ((LongWritable) object).get();
                            serializeWrite.writeLong(value);
                        }
                        break;
                    case FLOAT:
                        {
                            float value = ((FloatWritable) object).get();
                            serializeWrite.writeFloat(value);
                        }
                        break;
                    case DOUBLE:
                        {
                            double value = ((DoubleWritable) object).get();
                            serializeWrite.writeDouble(value);
                        }
                        break;
                    case STRING:
                        {
                            Text value = (Text) object;
                            byte[] stringBytes = value.getBytes();
                            int stringLength = stringBytes.length;
                            serializeWrite.writeString(stringBytes, 0, stringLength);
                        }
                        break;
                    case CHAR:
                        {
                            HiveChar value = ((HiveCharWritable) object).getHiveChar();
                            serializeWrite.writeHiveChar(value);
                        }
                        break;
                    case VARCHAR:
                        {
                            HiveVarchar value = ((HiveVarcharWritable) object).getHiveVarchar();
                            serializeWrite.writeHiveVarchar(value);
                        }
                        break;
                    case DECIMAL:
                        {
                            HiveDecimal value = ((HiveDecimalWritable) object).getHiveDecimal();
                            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                            serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
                        }
                        break;
                    case DATE:
                        {
                            Date value = ((DateWritable) object).get();
                            serializeWrite.writeDate(value);
                        }
                        break;
                    case TIMESTAMP:
                        {
                            Timestamp value = ((TimestampWritable) object).getTimestamp();
                            serializeWrite.writeTimestamp(value);
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
                            serializeWrite.writeHiveIntervalYearMonth(value);
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
                            serializeWrite.writeHiveIntervalDayTime(value);
                        }
                        break;
                    case BINARY:
                        {
                            BytesWritable byteWritable = (BytesWritable) object;
                            byte[] binaryBytes = byteWritable.getBytes();
                            int length = byteWritable.getLength();
                            serializeWrite.writeBinary(binaryBytes, 0, length);
                        }
                        break;
                    default:
                        throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
                }
            }
            break;
        case LIST:
            {
                ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
                TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
                ArrayList<Object> elements = (ArrayList<Object>) object;
                serializeWrite.beginList(elements);
                boolean isFirst = true;
                for (Object elementObject : elements) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateList();
                    }
                    if (elementObject == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, elementTypeInfo, elementObject);
                    }
                }
                serializeWrite.finishList();
            }
            break;
        case MAP:
            {
                MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
                TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
                TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
                HashMap<Object, Object> hashMap = (HashMap<Object, Object>) object;
                serializeWrite.beginMap(hashMap);
                boolean isFirst = true;
                for (Map.Entry<Object, Object> entry : hashMap.entrySet()) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateKeyValuePair();
                    }
                    if (entry.getKey() == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, keyTypeInfo, entry.getKey());
                    }
                    serializeWrite.separateKey();
                    if (entry.getValue() == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, valueTypeInfo, entry.getValue());
                    }
                }
                serializeWrite.finishMap();
            }
            break;
        case STRUCT:
            {
                StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                ArrayList<Object> fieldValues = (ArrayList<Object>) object;
                final int size = fieldValues.size();
                serializeWrite.beginStruct(fieldValues);
                boolean isFirst = true;
                for (int i = 0; i < size; i++) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateStruct();
                    }
                    serializeWrite(serializeWrite, fieldTypeInfos.get(i), fieldValues.get(i));
                }
                serializeWrite.finishStruct();
            }
            break;
        case UNION:
            {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final int size = fieldTypeInfos.size();
                StandardUnionObjectInspector.StandardUnion standardUnion = (StandardUnionObjectInspector.StandardUnion) object;
                byte tag = standardUnion.getTag();
                serializeWrite.beginUnion(tag);
                serializeWrite(serializeWrite, fieldTypeInfos.get(tag), standardUnion.getObject());
                serializeWrite.finishUnion();
            }
            break;
        default:
            throw new Error("Unknown category " + typeInfo.getCategory().name());
    }
}
Also used : StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ArrayList(java.util.ArrayList) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(java.sql.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Aggregations

StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)66 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)56 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)40 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)37 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)37 ArrayList (java.util.ArrayList)32 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)23 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)17 List (java.util.List)16 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)16 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)15 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)14 IntWritable (org.apache.hadoop.io.IntWritable)13 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)12 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)12 Text (org.apache.hadoop.io.Text)12 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)11 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)11 BytesWritable (org.apache.hadoop.io.BytesWritable)11 LongWritable (org.apache.hadoop.io.LongWritable)11