Search in sources :

Example 1 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class HBaseUtils method desierliazeDbNameTableNameFromPartitionKey.

private static List<String> desierliazeDbNameTableNameFromPartitionKey(byte[] key, Configuration conf) {
    StringBuffer names = new StringBuffer();
    names.append("dbName,tableName,");
    StringBuffer types = new StringBuffer();
    types.append("string,string,");
    BinarySortableSerDe serDe = new BinarySortableSerDe();
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
    try {
        serDe.initialize(conf, props);
        List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(0, 2);
        List<String> keys = new ArrayList<>();
        for (int i = 0; i < deserializedkeys.size(); i++) {
            Object deserializedKey = deserializedkeys.get(i);
            if (deserializedKey == null) {
                throw new RuntimeException("Can't have a null dbname or tablename");
            } else {
                TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString("string");
                ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
                Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
                keys.add((String) converter.convert(deserializedKey));
            }
        }
        return keys;
    } catch (SerDeException e) {
        throw new RuntimeException("Error when deserialize key", e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteString(com.google.protobuf.ByteString) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) List(java.util.List) ArrayList(java.util.ArrayList) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 2 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class DDLTask method alterTableAlterPart.

/**
 * Alter partition column type in a table
 *
 * @param db
 *          Database to rename the partition.
 * @param alterPartitionDesc
 *          change partition column type.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 */
private int alterTableAlterPart(Hive db, AlterTableAlterPartDesc alterPartitionDesc) throws HiveException {
    Table tbl = db.getTable(alterPartitionDesc.getTableName(), true);
    // This is checked by DDLSemanticAnalyzer
    assert (tbl.isPartitioned());
    List<FieldSchema> newPartitionKeys = new ArrayList<FieldSchema>();
    // with a non null value before trying to alter the partition column type.
    try {
        Set<Partition> partitions = db.getAllPartitionsOf(tbl);
        int colIndex = -1;
        for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
            colIndex++;
            if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
                break;
            }
        }
        if (colIndex == -1 || colIndex == tbl.getTTable().getPartitionKeys().size()) {
            throw new HiveException("Cannot find partition column " + alterPartitionDesc.getPartKeySpec().getName());
        }
        TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(alterPartitionDesc.getPartKeySpec().getType());
        ObjectInspector outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
        Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
        // For all the existing partitions, check if the value can be type casted to a non-null object
        for (Partition part : partitions) {
            if (part.getName().equals(conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
                continue;
            }
            try {
                String value = part.getValues().get(colIndex);
                Object convertedValue = converter.convert(value);
                if (convertedValue == null) {
                    throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + value + " resulted in NULL object");
                }
            } catch (Exception e) {
                throw new HiveException("Exception while converting " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + part.getValues().get(colIndex));
            }
        }
    } catch (Exception e) {
        throw new HiveException("Exception while checking type conversion of existing partition values to " + alterPartitionDesc.getPartKeySpec() + " : " + e.getMessage());
    }
    for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
        if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
            newPartitionKeys.add(alterPartitionDesc.getPartKeySpec());
        } else {
            newPartitionKeys.add(col);
        }
    }
    tbl.getTTable().setPartitionKeys(newPartitionKeys);
    db.alterTable(tbl, null);
    work.getInputs().add(new ReadEntity(tbl));
    // We've already locked the table as the input, don't relock it as the output.
    addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) StatObjectConverter(org.apache.hadoop.hive.metastore.StatObjectConverter) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) HivePrivilegeObject(org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject) HiveLockObject(org.apache.hadoop.hive.ql.lockmgr.HiveLockObject) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 3 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class VectorMapOperator method internalSetChildren.

/*
   * Create information for vector map operator.
   * The member oneRootOperator has been set.
   */
private void internalSetChildren(Configuration hconf) throws Exception {
    // The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
    // so set it here to none.
    currentReadType = VectorMapOperatorReadType.NONE;
    batchContext = conf.getVectorizedRowBatchCtx();
    /*
     * Use a different batch for vectorized Input File Format readers so they can do their work
     * overlapped with work of the row collection that vector/row deserialization does.  This allows
     * the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
     */
    vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
    conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
    /*
     * This batch is used by vector/row deserializer readers.
     */
    deserializerBatch = batchContext.createVectorizedRowBatch();
    batchCounter = 0;
    dataColumnCount = batchContext.getDataColumnCount();
    partitionColumnCount = batchContext.getPartitionColumnCount();
    partitionValues = new Object[partitionColumnCount];
    virtualColumnCount = batchContext.getVirtualColumnCount();
    rowIdentifierColumnNum = batchContext.findVirtualColumnNum(VirtualColumn.ROWID);
    hasRowIdentifier = (rowIdentifierColumnNum != -1);
    dataColumnNums = batchContext.getDataColumnNums();
    Preconditions.checkState(dataColumnNums != null);
    // Form a truncated boolean include array for our vector/row deserializers.
    determineDataColumnsToIncludeTruncated();
    /*
     * Create table related objects
     */
    final String[] rowColumnNames = batchContext.getRowColumnNames();
    final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
    tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
    tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
    tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
    /*
     * NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
     * the included columns (+ partition columns).
     *
     * For now, we need to model the object inspector rows because there are still several
     * vectorized operators that use them.
     *
     * We need to continue to model the Object[] as having null objects for not included columns
     * until the following has been fixed:
     *    o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
     *    o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
     *      use the row super class to process rows.
     */
    /*
     * The Vectorizer class enforces that there is only one TableScanOperator, so
     * we don't need the more complicated multiple root operator mapping that MapOperator has.
     */
    fileToPartitionContextMap = new HashMap<>();
    // Temporary map so we only create one partition context entry.
    HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
    for (Map.Entry<Path, List<String>> entry : conf.getPathToAliases().entrySet()) {
        Path path = entry.getKey();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
        VectorPartitionContext vectorPartitionContext;
        if (!partitionContextMap.containsKey(partDesc)) {
            vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
            partitionContextMap.put(partDesc, vectorPartitionContext);
        } else {
            vectorPartitionContext = partitionContextMap.get(partDesc);
        }
        fileToPartitionContextMap.put(path, vectorPartitionContext);
    }
    // Create list of one.
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    children.add(oneRootOperator);
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 4 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class AlterTableAlterPartitionOperation method checkPartitionValues.

/**
 * Check if the existing partition values can be type casted to the new column type
 * with a non null value before trying to alter the partition column type.
 */
private void checkPartitionValues(Table tbl, int colIndex) throws HiveException {
    TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(desc.getPartKeyType());
    ObjectInspector outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
    Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
    Set<Partition> partitions = context.getDb().getAllPartitionsOf(tbl);
    for (Partition part : partitions) {
        if (part.getName().equals(context.getConf().getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
            continue;
        }
        try {
            String value = part.getValues().get(colIndex);
            Object convertedValue = converter.convert(value);
            if (convertedValue == null) {
                throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + value + " resulted in NULL object");
            }
        } catch (Exception e) {
            throw new HiveException("Exception while converting " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + part.getValues().get(colIndex));
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 5 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class VectorAssignRow method initConvertSourceEntry.

/*
   * Initialize one column's source conversion related arrays.
   * Assumes initTargetEntry has already been called.
   */
private void initConvertSourceEntry(int logicalColumnIndex, TypeInfo convertSourceTypeInfo) {
    isConvert[logicalColumnIndex] = true;
    final Category convertSourceCategory = convertSourceTypeInfo.getCategory();
    convertSourceOI[logicalColumnIndex] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(convertSourceTypeInfo);
    if (convertSourceCategory == Category.PRIMITIVE) {
        // These need to be based on the target.
        final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfos[logicalColumnIndex]).getPrimitiveCategory();
        switch(targetPrimitiveCategory) {
            case DATE:
                convertTargetWritables[logicalColumnIndex] = new DateWritableV2();
                break;
            case STRING:
                convertTargetWritables[logicalColumnIndex] = new Text();
                break;
            default:
                // No additional data type specific setting.
                break;
        }
    }
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) Text(org.apache.hadoop.io.Text) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)44 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)31 ArrayList (java.util.ArrayList)22 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)17 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)13 HiveConf (org.apache.hadoop.hive.conf.HiveConf)12 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)11 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)10 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)10 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)8 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)8 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)8 VectorRandomBatchSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource)7 VectorRandomRowSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)7 GenerationSpec (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec)7 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)7 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)6 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)6