Search in sources :

Example 51 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project cdap by caskdata.

the class ObjectDeserializer method deserializeField.

/**
   * Translate a field that fits a {@link Schema} field into a type that Hive understands.
   * For example, a ByteBuffer is allowed by schema but Hive only understands byte arrays, so all ByteBuffers must
   * be changed into byte arrays. Reflection is used to examine java objects if the expected hive type is a struct.
   *
   * @param field value of the field to deserialize.
   * @param typeInfo type of the field as expected by Hive.
   * @param schema schema of the field.
   * @return translated field.
   * @throws NoSuchFieldException if a struct field was expected but not found in the object.
   * @throws IllegalAccessException if a struct field was not accessible.
   */
private Object deserializeField(Object field, TypeInfo typeInfo, Schema schema) throws NoSuchFieldException, IllegalAccessException {
    boolean isNullable = schema.isNullable();
    if (field == null) {
        if (isNullable) {
            return null;
        } else {
            throw new UnexpectedFormatException("Non-nullable field was null.");
        }
    }
    if (isNullable) {
        schema = schema.getNonNullable();
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            return deserializePrimitive(field, (PrimitiveTypeInfo) typeInfo);
        case LIST:
            // HIVE!! some versions will turn bytes into array<tinyint> instead of binary... so special case it.
            // TODO: remove once CDAP-1556 is done
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            if (isByteArray(listTypeInfo) && !(field instanceof Collection)) {
                return deserializeByteArray(field);
            }
            return deserializeList(field, (ListTypeInfo) typeInfo, schema.getComponentSchema());
        case MAP:
            return deserializeMap(field, (MapTypeInfo) typeInfo, schema.getMapSchema());
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            ArrayList<String> innerFieldNames = structTypeInfo.getAllStructFieldNames();
            ArrayList<TypeInfo> innerFieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
            return flattenRecord(field, innerFieldNames, innerFieldTypes, schema);
        case UNION:
            // TODO: decide what to do here
            return field;
    }
    return null;
}
Also used : ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) UnexpectedFormatException(co.cask.cdap.api.data.format.UnexpectedFormatException) Collection(java.util.Collection) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 52 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class ExprNodeConverter method visitFieldAccess.

/**
 * TODO: Handle 1) cast 2), Windowing Agg Call
 */
@Override
public /*
   * Handles expr like struct(key,value).key
   * Follows same rules as TypeCheckProcFactory::getXpathOrFuncExprNodeDesc()
   * which is equivalent version of parsing such an expression from AST
   */
ExprNodeDesc visitFieldAccess(RexFieldAccess fieldAccess) {
    ExprNodeDesc parent = fieldAccess.getReferenceExpr().accept(this);
    String child = fieldAccess.getField().getName();
    TypeInfo parentType = parent.getTypeInfo();
    // Allow accessing a field of list element structs directly from a list
    boolean isList = (parentType.getCategory() == ObjectInspector.Category.LIST);
    if (isList) {
        parentType = ((ListTypeInfo) parentType).getListElementTypeInfo();
    }
    TypeInfo t = ((StructTypeInfo) parentType).getStructFieldTypeInfo(child);
    return new ExprNodeFieldDesc(t, parent, child, isList);
}
Also used : ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DateString(org.apache.calcite.util.DateString) TimestampString(org.apache.calcite.util.TimestampString) TimeString(org.apache.calcite.util.TimeString) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 53 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VectorizedParquetRecordReader method buildVectorizedParquetReader.

// Build VectorizedParquetColumnReader via Hive typeInfo and Parquet schema
private VectorizedColumnReader buildVectorizedParquetReader(TypeInfo typeInfo, Type type, PageReadStore pages, List<ColumnDescriptor> columnDescriptors, boolean skipTimestampConversion, int depth) throws IOException {
    List<ColumnDescriptor> descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors);
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            if (fileSchema.getColumns().contains(descriptors.get(0))) {
                return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo);
            } else {
                // Support for schema evolution
                return new VectorizedDummyColumnReader();
            }
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<VectorizedColumnReader> fieldReaders = new ArrayList<>();
            List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
            List<Type> types = type.asGroupType().getFields();
            for (int i = 0; i < fieldTypes.size(); i++) {
                VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, skipTimestampConversion, depth + 1);
                if (r != null) {
                    fieldReaders.add(r);
                } else {
                    throw new RuntimeException("Fail to build Parquet vectorized reader based on Hive type " + fieldTypes.get(i).getTypeName() + " and Parquet type" + types.get(i).toString());
                }
            }
            return new VectorizedStructColumnReader(fieldReaders);
        case LIST:
            checkListColumnSupport(((ListTypeInfo) typeInfo).getListElementTypeInfo());
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            return new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, getElementType(type), typeInfo);
        case MAP:
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            // to handle the different Map definition in Parquet, eg:
            // definition has 1 group:
            // repeated group map (MAP_KEY_VALUE)
            // {required binary key (UTF8); optional binary value (UTF8);}
            // definition has 2 groups:
            // optional group m1 (MAP) {
            // repeated group map (MAP_KEY_VALUE)
            // {required binary key (UTF8); optional binary value (UTF8);}
            // }
            int nestGroup = 0;
            GroupType groupType = type.asGroupType();
            // otherwise, continue to get the group type until MAP_DEFINITION_LEVEL_MAX.
            while (groupType.getFieldCount() < 2) {
                if (nestGroup > MAP_DEFINITION_LEVEL_MAX) {
                    throw new RuntimeException("More than " + MAP_DEFINITION_LEVEL_MAX + " level is found in Map definition, " + "Failed to get the field types for Map with type " + type);
                }
                groupType = groupType.getFields().get(0).asGroupType();
                nestGroup++;
            }
            List<Type> kvTypes = groupType.getFields();
            VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, kvTypes.get(0), typeInfo);
            VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader(descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, kvTypes.get(1), typeInfo);
            return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader);
        case UNION:
        default:
            throw new RuntimeException("Unsupported category " + typeInfo.getCategory().name());
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveType(org.apache.parquet.schema.PrimitiveType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) ParquetRuntimeException(org.apache.parquet.ParquetRuntimeException) GroupType(org.apache.parquet.schema.GroupType)

Example 54 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class SampleHBasePredicateDecomposer method getScanRange.

@Override
public HBaseScanRange getScanRange(List<IndexSearchCondition> searchConditions) throws Exception {
    Map<String, List<IndexSearchCondition>> fieldConds = new HashMap<String, List<IndexSearchCondition>>();
    for (IndexSearchCondition condition : searchConditions) {
        String fieldName = condition.getFields()[0];
        List<IndexSearchCondition> fieldCond = fieldConds.get(fieldName);
        if (fieldCond == null) {
            fieldConds.put(fieldName, fieldCond = new ArrayList<IndexSearchCondition>());
        }
        fieldCond.add(condition);
    }
    List<Filter> filters = new ArrayList<Filter>();
    HBaseScanRange range = new HBaseScanRange();
    StructTypeInfo type = (StructTypeInfo) keyMapping.columnType;
    for (String name : type.getAllStructFieldNames()) {
        List<IndexSearchCondition> fieldCond = fieldConds.get(name);
        if (fieldCond == null || fieldCond.size() > 2) {
            continue;
        }
        for (IndexSearchCondition condition : fieldCond) {
            if (condition.getConstantDesc().getValue() == null) {
                continue;
            }
            String comparisonOp = condition.getComparisonOp();
            String constantVal = String.valueOf(condition.getConstantDesc().getValue());
            byte[] valueAsBytes = toBinary(constantVal, FIXED_LENGTH, false, false);
            if (comparisonOp.endsWith("UDFOPEqualOrGreaterThan")) {
                filters.add(new RowFilter(CompareOp.GREATER_OR_EQUAL, new BinaryComparator(valueAsBytes)));
            } else if (comparisonOp.endsWith("UDFOPGreaterThan")) {
                filters.add(new RowFilter(CompareOp.GREATER, new BinaryComparator(valueAsBytes)));
            } else if (comparisonOp.endsWith("UDFOPEqualOrLessThan")) {
                filters.add(new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(valueAsBytes)));
            } else if (comparisonOp.endsWith("UDFOPLessThan")) {
                filters.add(new RowFilter(CompareOp.LESS, new BinaryComparator(valueAsBytes)));
            } else {
                throw new IOException(comparisonOp + " is not a supported comparison operator");
            }
        }
    }
    if (!filters.isEmpty()) {
        range.addFilter(new FilterList(Operator.MUST_PASS_ALL, filters));
    }
    return range;
}
Also used : HashMap(java.util.HashMap) IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) FilterList(org.apache.hadoop.hbase.filter.FilterList) IOException(java.io.IOException) BinaryComparator(org.apache.hadoop.hbase.filter.BinaryComparator) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) Filter(org.apache.hadoop.hbase.filter.Filter) FilterList(org.apache.hadoop.hbase.filter.FilterList) ArrayList(java.util.ArrayList) List(java.util.List)

Example 55 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class HCatRecordObjectInspectorFactory method getHCatRecordObjectInspector.

/**
 * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into
 * @param typeInfo Type definition for the record to look into
 * @return appropriate HCatRecordObjectInspector
 * @throws SerDeException
 */
public static HCatRecordObjectInspector getHCatRecordObjectInspector(StructTypeInfo typeInfo) throws SerDeException {
    HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo);
    if (oi == null) {
        LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName());
        switch(typeInfo.getCategory()) {
            case STRUCT:
                StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
                List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
                for (int i = 0; i < fieldTypeInfos.size(); i++) {
                    fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
                }
                oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors);
                break;
            default:
                // - anything else is an error. Return null as the inspector.
                throw new SerDeException("TypeInfo [" + typeInfo.getTypeName() + "] was not of struct type - HCatRecord expected struct type, got [" + typeInfo.getCategory().toString() + "]");
        }
        cachedHCatRecordObjectInspectors.put(typeInfo, oi);
    }
    return oi;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)66 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)56 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)40 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)37 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)37 ArrayList (java.util.ArrayList)32 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)23 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)17 List (java.util.List)16 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)16 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)15 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)14 IntWritable (org.apache.hadoop.io.IntWritable)13 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)12 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)12 Text (org.apache.hadoop.io.Text)12 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)11 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)11 BytesWritable (org.apache.hadoop.io.BytesWritable)11 LongWritable (org.apache.hadoop.io.LongWritable)11