Search in sources :

Example 91 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.

the class ColumnMappings method setHiveColumnDescription.

void setHiveColumnDescription(String serdeName, List<String> columnNames, List<TypeInfo> columnTypes) throws SerDeException {
    if (columnsMapping.length != columnNames.size()) {
        throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while hbase.columns.mapping has " + columnsMapping.length + " elements" + " (counting the key if implicit)");
    }
    // where key extends LazyPrimitive<?, ?> and thus has type Category.PRIMITIVE
    for (int i = 0; i < columnNames.size(); i++) {
        ColumnMapping colMap = columnsMapping[i];
        colMap.columnName = columnNames.get(i);
        colMap.columnType = columnTypes.get(i);
        if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
            TypeInfo typeInfo = columnTypes.get(i);
            if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) || (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE)) {
                throw new SerDeException(serdeName + ": hbase column family '" + colMap.familyName + "' should be mapped to Map<? extends LazyPrimitive<?, ?>,?>, that is " + "the Key for the map should be of primitive type, but is mapped to " + typeInfo.getTypeName());
            }
        }
        if (colMap.hbaseTimestamp) {
            TypeInfo typeInfo = columnTypes.get(i);
            if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) && !colMap.isCategory(PrimitiveCategory.LONG)) {
                throw new SerDeException(serdeName + ": timestamp columns should be of " + "timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
            }
        }
    }
}
Also used : MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 92 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.

the class VectorPartitionConversion method isImplicitVectorColumnConversion.

public static boolean isImplicitVectorColumnConversion(TypeInfo fromTypeInfo, TypeInfo toTypeInfo) {
    if (fromTypeInfo.getCategory() == Category.PRIMITIVE && toTypeInfo.getCategory() == Category.PRIMITIVE) {
        PrimitiveCategory fromPrimitiveCategory = ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory();
        PrimitiveCategory toPrimitiveCategory = ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory();
        PrimitiveCategory[] toPrimitiveCategories = implicitPrimitiveMap.get(fromPrimitiveCategory);
        if (toPrimitiveCategories != null) {
            for (PrimitiveCategory candidatePrimitiveCategory : toPrimitiveCategories) {
                if (candidatePrimitiveCategory == toPrimitiveCategory) {
                    return true;
                }
            }
        }
        return false;
    }
    return false;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 93 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.

the class FetchOperator method getRecordReader.

private RecordReader<WritableComparable, Writable> getRecordReader() throws Exception {
    if (!iterSplits.hasNext()) {
        FetchInputFormatSplit[] splits = getNextSplits();
        if (splits == null) {
            return null;
        }
        if (!isPartitioned || convertedOI == null) {
            currSerDe = tableSerDe;
            ObjectConverter = null;
        } else {
            currSerDe = needConversion(currDesc) ? currDesc.getDeserializer(job) : tableSerDe;
            ObjectInspector inputOI = currSerDe.getObjectInspector();
            ObjectConverter = ObjectInspectorConverters.getConverter(inputOI, convertedOI);
        }
        if (isPartitioned) {
            row[1] = createPartValue(currDesc, partKeyOI);
        }
        iterSplits = Arrays.asList(splits).iterator();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Creating fetchTask with deserializer typeinfo: " + currSerDe.getObjectInspector().getTypeName());
            LOG.debug("deserializer properties:\ntable properties: " + currDesc.getTableDesc().getProperties() + "\npartition properties: " + currDesc.getProperties());
        }
    }
    final FetchInputFormatSplit target = iterSplits.next();
    @SuppressWarnings("unchecked") final RecordReader<WritableComparable, Writable> reader = target.getRecordReader(job);
    if (hasVC || work.getSplitSample() != null) {
        currRecReader = new HiveRecordReader<WritableComparable, Writable>(reader, job) {

            @Override
            public boolean doNext(WritableComparable key, Writable value) throws IOException {
                // each split by table sampling, stop fetching any more (early exit)
                if (target.shrinkedLength > 0 && context.getIoCxt().getCurrentBlockStart() > target.shrinkedLength) {
                    return false;
                }
                return super.doNext(key, value);
            }
        };
        ((HiveContextAwareRecordReader) currRecReader).initIOContext(target, job, target.inputFormat.getClass(), reader);
    } else {
        currRecReader = reader;
    }
    key = currRecReader.createKey();
    value = currRecReader.createValue();
    headerCount = footerCount = 0;
    return currRecReader;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveContextAwareRecordReader(org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader) WritableComparable(org.apache.hadoop.io.WritableComparable) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException)

Example 94 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project phoenix by apache.

the class PhoenixSerDe method createLazyPhoenixInspector.

private ObjectInspector createLazyPhoenixInspector(Configuration conf, Properties tbl) throws SerDeException {
    List<String> columnNameList = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS).split(PhoenixStorageHandlerConstants.COMMA));
    List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES));
    List<ObjectInspector> columnObjectInspectors = Lists.newArrayListWithExpectedSize(columnTypeList.size());
    for (TypeInfo typeInfo : columnTypeList) {
        columnObjectInspectors.add(PhoenixObjectInspectorFactory.createObjectInspector(typeInfo, serdeParams));
    }
    return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columnNameList, columnObjectInspectors, null, serdeParams.getSeparators()[0], serdeParams, ObjectInspectorOptions.JAVA);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 95 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project drill by apache.

the class HiveAbstractReader method init.

private void init() throws ExecutionSetupException {
    final JobConf job = new JobConf(hiveConf);
    // Get the configured default val
    defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);
    Properties tableProperties;
    try {
        tableProperties = HiveUtilities.getTableMetadata(table);
        final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table);
        HiveUtilities.addConfToJob(job, partitionProperties);
        final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties);
        final StructObjectInspector tableOI = getStructOI(tableSerDe);
        if (partition != null) {
            partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties);
            partitionOI = getStructOI(partitionSerDe);
            finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
            partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
        } else {
            // For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
            partitionSerDe = tableSerDe;
            partitionOI = tableOI;
            partTblObjectInspectorConverter = null;
            finalOI = tableOI;
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
        }
        if (logger.isTraceEnabled()) {
            for (StructField field : finalOI.getAllStructFieldRefs()) {
                logger.trace("field in finalOI: {}", field.getClass().getName());
            }
            logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
        }
        // Get list of partition column names
        final List<String> partitionNames = Lists.newArrayList();
        for (FieldSchema field : table.getPartitionKeys()) {
            partitionNames.add(field.getName());
        }
        // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
        // may not contain the schema, instead it is derived from other sources such as table properties or external file.
        // SerDe object knows how to get the schema with all the config and table properties passed in initialization.
        // ObjectInspector created from the SerDe object has the schema.
        final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
        final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();
        // Select list of columns for project pushdown into Hive SerDe readers.
        final List<Integer> columnIds = Lists.newArrayList();
        if (isStarQuery()) {
            selectedColumnNames = tableColumnNames;
            for (int i = 0; i < selectedColumnNames.size(); i++) {
                columnIds.add(i);
            }
            selectedPartitionNames = partitionNames;
        } else {
            selectedColumnNames = Lists.newArrayList();
            for (SchemaPath field : getColumns()) {
                String columnName = field.getRootSegment().getPath();
                if (partitionNames.contains(columnName)) {
                    selectedPartitionNames.add(columnName);
                } else {
                    columnIds.add(tableColumnNames.indexOf(columnName));
                    selectedColumnNames.add(columnName);
                }
            }
        }
        ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);
        for (String columnName : selectedColumnNames) {
            StructField fieldRef = finalOI.getStructFieldRef(columnName);
            selectedStructFieldRefs.add(fieldRef);
            ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());
            selectedColumnObjInspectors.add(fieldOI);
            selectedColumnTypes.add(typeInfo);
            selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
        }
        for (int i = 0; i < selectedColumnNames.size(); ++i) {
            logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName());
        }
        for (int i = 0; i < table.getPartitionKeys().size(); i++) {
            FieldSchema field = table.getPartitionKeys().get(i);
            if (selectedPartitionNames.contains(field.getName())) {
                TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
                selectedPartitionTypes.add(pType);
                if (partition != null) {
                    selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue));
                }
            }
        }
    } catch (Exception e) {
        throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e);
    }
    if (!empty) {
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
            logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
        } catch (Exception e) {
            throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        internalInit(tableProperties, reader);
    }
}
Also used : SerDe(org.apache.hadoop.hive.serde2.SerDe) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Properties(java.util.Properties) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ExecutionException(java.util.concurrent.ExecutionException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SchemaPath(org.apache.drill.common.expression.SchemaPath) JobConf(org.apache.hadoop.mapred.JobConf) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)292 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)181 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)144 ArrayList (java.util.ArrayList)124 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)97 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)91 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)89 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)77 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)61 Test (org.junit.Test)54 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)53 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)50 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)44 Text (org.apache.hadoop.io.Text)41 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)39 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)36 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)35 List (java.util.List)33 HashMap (java.util.HashMap)32 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)32