Search in sources :

Example 51 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class MapRecordProcessor method getKeyValueReader.

private KeyValueReader getKeyValueReader(Collection<KeyValueReader> keyValueReaders, AbstractMapOperator mapOp) throws Exception {
    List<KeyValueReader> kvReaderList = new ArrayList<KeyValueReader>(keyValueReaders);
    // this sets up the map operator contexts correctly
    mapOp.initializeContexts();
    Deserializer deserializer = mapOp.getCurrentDeserializer();
    // deserializer is null in case of VectorMapOperator
    KeyValueReader reader = new KeyValueInputMerger(kvReaderList, deserializer, new ObjectInspector[] { deserializer == null ? null : deserializer.getObjectInspector() }, mapOp.getConf().getSortCols());
    return reader;
}
Also used : Deserializer(org.apache.hadoop.hive.serde2.Deserializer) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ArrayList(java.util.ArrayList) KeyValueInputMerger(org.apache.hadoop.hive.ql.exec.tez.tools.KeyValueInputMerger)

Example 52 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class CreateTableLikeOperation method setTableParameters.

private void setTableParameters(Table tbl) throws HiveException {
    Set<String> retainer = new HashSet<String>();
    Class<? extends Deserializer> serdeClass;
    try {
        serdeClass = tbl.getDeserializerClass();
    } catch (Exception e) {
        throw new HiveException(e);
    }
    // We should copy only those table parameters that are specified in the config.
    SerDeSpec spec = AnnotationUtils.getAnnotation(serdeClass, SerDeSpec.class);
    // for non-native table, property storage_handler should be retained
    retainer.add(META_TABLE_STORAGE);
    if (spec != null && spec.schemaProps() != null) {
        retainer.addAll(Arrays.asList(spec.schemaProps()));
    }
    String paramsStr = HiveConf.getVar(context.getConf(), HiveConf.ConfVars.DDL_CTL_PARAMETERS_WHITELIST);
    if (paramsStr != null) {
        retainer.addAll(Arrays.asList(paramsStr.split(",")));
    }
    Map<String, String> params = tbl.getParameters();
    if (!retainer.isEmpty()) {
        params.keySet().retainAll(retainer);
    } else {
        params.clear();
    }
    if (desc.getTblProps() != null) {
        params.putAll(desc.getTblProps());
    }
}
Also used : SerDeSpec(org.apache.hadoop.hive.serde2.SerDeSpec) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashSet(java.util.HashSet)

Example 53 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class DynamicValueRegistryTez method init.

@Override
public void init(RegistryConf conf) throws Exception {
    RegistryConfTez rct = (RegistryConfTez) conf;
    for (String inputSourceName : rct.baseWork.getInputSourceToRuntimeValuesInfo().keySet()) {
        LOG.info("Runtime value source: " + inputSourceName);
        LogicalInput runtimeValueInput = rct.inputs.get(inputSourceName);
        RuntimeValuesInfo runtimeValuesInfo = rct.baseWork.getInputSourceToRuntimeValuesInfo().get(inputSourceName);
        // Setup deserializer/obj inspectors for the incoming data source
        AbstractSerDe serDe = ReflectionUtils.newInstance(runtimeValuesInfo.getTableDesc().getSerDeClass(), null);
        serDe.initialize(rct.conf, runtimeValuesInfo.getTableDesc().getProperties(), null);
        ObjectInspector inspector = serDe.getObjectInspector();
        // Set up col expressions for the dynamic values using this input
        List<ExprNodeEvaluator> colExprEvaluators = new ArrayList<ExprNodeEvaluator>();
        for (ExprNodeDesc expr : runtimeValuesInfo.getColExprs()) {
            ExprNodeEvaluator exprEval = ExprNodeEvaluatorFactory.get(expr, null);
            exprEval.initialize(inspector);
            colExprEvaluators.add(exprEval);
        }
        runtimeValueInput.start();
        List<Input> inputList = new ArrayList<Input>();
        inputList.add(runtimeValueInput);
        rct.processorContext.waitForAllInputsReady(inputList);
        KeyValueReader kvReader = (KeyValueReader) runtimeValueInput.getReader();
        long rowCount = 0;
        while (kvReader.next()) {
            Object row = serDe.deserialize((Writable) kvReader.getCurrentValue());
            rowCount++;
            for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
                // Read each expression and save it to the value registry
                ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
                Object val = eval.evaluate(row);
                setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), val);
            }
        }
        // For now, expecting a single row (min/max, aggregated bloom filter), or no rows
        if (rowCount == 0) {
            LOG.debug("No input rows from " + inputSourceName + ", filling dynamic values with nulls");
            for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
                ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
                setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), null);
            }
        } else if (rowCount > 1) {
            throw new IllegalStateException("Expected 0 or 1 rows from " + inputSourceName + ", got " + rowCount);
        }
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) ArrayList(java.util.ArrayList) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) LogicalInput(org.apache.tez.runtime.api.LogicalInput) Input(org.apache.tez.runtime.api.Input) LogicalInput(org.apache.tez.runtime.api.LogicalInput) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 54 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class SemanticAnalyzer method genConvertCol.

private List<ExprNodeDesc> genConvertCol(String dest, QB qb, TableDesc tableDesc, Operator input, List<Integer> posns, boolean convert) throws SemanticException {
    StructObjectInspector oi = null;
    try {
        AbstractSerDe deserializer = tableDesc.getSerDeClass().newInstance();
        deserializer.initialize(conf, tableDesc.getProperties(), null);
        oi = (StructObjectInspector) deserializer.getObjectInspector();
    } catch (Exception e) {
        throw new SemanticException(e);
    }
    List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
    List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
    // Check column type
    int columnNumber = posns.size();
    List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
    for (Integer posn : posns) {
        ObjectInspector tableFieldOI = tableFields.get(posn).getFieldObjectInspector();
        TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
        TypeInfo rowFieldTypeInfo = rowFields.get(posn).getType();
        ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(posn).getInternalName(), rowFields.get(posn).getTabAlias(), rowFields.get(posn).getIsVirtualCol());
        if (convert && !tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
            // need to do some conversions here
            if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
                // cannot convert to complex types
                column = null;
            } else {
                column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
            }
            if (column == null) {
                String reason = "Cannot convert column " + posn + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
                throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
            }
        }
        expressions.add(column);
    }
    return expressions;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 55 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class SemanticAnalyzer method genConversionSelectOperator.

/**
 * Generate the conversion SelectOperator that converts the columns into the
 * types that are expected by the table_desc.
 */
private Operator genConversionSelectOperator(String dest, QB qb, Operator input, Deserializer deserializer, DynamicPartitionCtx dpCtx, List<FieldSchema> parts) throws SemanticException {
    StructObjectInspector oi = null;
    try {
        oi = (StructObjectInspector) deserializer.getObjectInspector();
    } catch (Exception e) {
        throw new SemanticException(e);
    }
    // Check column number
    List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
    boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
    List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
    int inColumnCnt = rowFields.size();
    int outColumnCnt = tableFields.size();
    if (dynPart && dpCtx != null) {
        outColumnCnt += dpCtx.getNumDPCols();
    }
    // The numbers of input columns and output columns should match for regular query
    if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) {
        String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns.";
        throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
    }
    // Check column types
    boolean converted = false;
    int columnNumber = tableFields.size();
    List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
    // does the conversion to String by itself.
    if (!(deserializer instanceof MetadataTypedColumnsetSerDe) && !deleting(dest)) {
        // offset by 1 so that we don't try to convert the ROW__ID
        if (updating(dest)) {
            expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true));
        }
        // here only deals with non-partition columns. We deal with partition columns next
        for (int i = 0; i < columnNumber; i++) {
            int rowFieldsOffset = updating(dest) ? i + 1 : i;
            ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector();
            TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
            TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType();
            ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol());
            // Thus, we still keep the conversion.
            if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
                // need to do some conversions here
                converted = true;
                if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
                    // cannot convert to complex types
                    column = null;
                } else {
                    column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
                }
                if (column == null) {
                    String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
                    throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
                }
            }
            expressions.add(column);
        }
        // deal with dynamic partition columns
        if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
            // rowFields contains non-partitioned columns (tableFields) followed by DP columns
            int rowFieldsOffset = tableFields.size() + (updating(dest) ? 1 : 0);
            for (int dpColIdx = 0; dpColIdx < rowFields.size() - rowFieldsOffset; ++dpColIdx) {
                // create ExprNodeDesc
                ColumnInfo inputColumn = rowFields.get(dpColIdx + rowFieldsOffset);
                TypeInfo inputTypeInfo = inputColumn.getType();
                ExprNodeDesc column = new ExprNodeColumnDesc(inputTypeInfo, inputColumn.getInternalName(), "", true);
                // Cast input column to destination column type if necessary.
                if (conf.getBoolVar(DYNAMICPARTITIONCONVERT)) {
                    if (parts != null && !parts.isEmpty()) {
                        String destPartitionName = dpCtx.getDPColNames().get(dpColIdx);
                        FieldSchema destPartitionFieldSchema = parts.stream().filter(dynamicPartition -> dynamicPartition.getName().equals(destPartitionName)).findFirst().orElse(null);
                        if (destPartitionFieldSchema == null) {
                            throw new IllegalStateException("Partition schema for dynamic partition " + destPartitionName + " not found in DynamicPartitionCtx.");
                        }
                        String partitionType = destPartitionFieldSchema.getType();
                        if (partitionType == null) {
                            throw new IllegalStateException("Couldn't get FieldSchema for partition" + destPartitionFieldSchema.getName());
                        }
                        PrimitiveTypeInfo partitionTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(partitionType);
                        if (!partitionTypeInfo.equals(inputTypeInfo)) {
                            column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, partitionTypeInfo);
                            converted = true;
                        }
                    } else {
                        LOG.warn("Partition schema for dynamic partition " + inputColumn.getAlias() + " (" + inputColumn.getInternalName() + ") not found in DynamicPartitionCtx. " + "This is expected with a CTAS.");
                    }
                }
                expressions.add(column);
            }
        }
    }
    if (converted) {
        // add the select operator
        RowResolver rowResolver = new RowResolver();
        List<String> colNames = new ArrayList<String>();
        Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
        for (int i = 0; i < expressions.size(); i++) {
            String name = getColumnInternalName(i);
            rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false));
            colNames.add(name);
            colExprMap.put(name, expressions.get(i));
        }
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
        input.setColumnExprMap(colExprMap);
    }
    return input;
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) MetadataTypedColumnsetSerDe(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

Deserializer (org.apache.hadoop.hive.serde2.Deserializer)27 ArrayList (java.util.ArrayList)25 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)20 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)19 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)18 IOException (java.io.IOException)16 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)15 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)14 Properties (java.util.Properties)12 Path (org.apache.hadoop.fs.Path)11 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)10 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)10 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)8 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)8 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)8 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)8 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)8 HashMap (java.util.HashMap)7 List (java.util.List)7