Search in sources :

Example 1 with MetadataTypedColumnsetSerDe

use of org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe in project hive by apache.

the class SemanticAnalyzer method genConversionSelectOperator.

/**
   * Generate the conversion SelectOperator that converts the columns into the
   * types that are expected by the table_desc.
   */
Operator genConversionSelectOperator(String dest, QB qb, Operator input, TableDesc table_desc, DynamicPartitionCtx dpCtx) throws SemanticException {
    StructObjectInspector oi = null;
    try {
        Deserializer deserializer = table_desc.getDeserializerClass().newInstance();
        SerDeUtils.initializeSerDe(deserializer, conf, table_desc.getProperties(), null);
        oi = (StructObjectInspector) deserializer.getObjectInspector();
    } catch (Exception e) {
        throw new SemanticException(e);
    }
    // Check column number
    List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
    boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
    ArrayList<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
    int inColumnCnt = rowFields.size();
    int outColumnCnt = tableFields.size();
    if (dynPart && dpCtx != null) {
        outColumnCnt += dpCtx.getNumDPCols();
    }
    // The numbers of input columns and output columns should match for regular query
    if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) {
        String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns.";
        throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
    }
    // Check column types
    boolean converted = false;
    int columnNumber = tableFields.size();
    ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
    // MetadataTypedColumnsetSerDe does not need type conversions because it
    // does the conversion to String by itself.
    boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class);
    boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class);
    if (!isMetaDataSerDe && !deleting(dest)) {
        // offset by 1 so that we don't try to convert the ROW__ID
        if (updating(dest)) {
            expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true));
        }
        // here only deals with non-partition columns. We deal with partition columns next
        for (int i = 0; i < columnNumber; i++) {
            int rowFieldsOffset = updating(dest) ? i + 1 : i;
            ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector();
            TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
            TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType();
            ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol());
            // JSON-format.
            if (!tableFieldTypeInfo.equals(rowFieldTypeInfo) && !(isLazySimpleSerDe && tableFieldTypeInfo.getCategory().equals(Category.PRIMITIVE) && tableFieldTypeInfo.equals(TypeInfoFactory.stringTypeInfo))) {
                // need to do some conversions here
                converted = true;
                if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
                    // cannot convert to complex types
                    column = null;
                } else {
                    column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
                }
                if (column == null) {
                    String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
                    throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
                }
            }
            expressions.add(column);
        }
    }
    // deal with dynamic partition columns: convert ExprNodeDesc type to String??
    if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
        // DP columns starts with tableFields.size()
        for (int i = tableFields.size() + (updating(dest) ? 1 : 0); i < rowFields.size(); ++i) {
            TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
            ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", true);
            expressions.add(column);
        }
    // converted = true; // [TODO]: should we check & convert type to String and set it to true?
    }
    if (converted) {
        // add the select operator
        RowResolver rowResolver = new RowResolver();
        ArrayList<String> colNames = new ArrayList<String>();
        Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
        for (int i = 0; i < expressions.size(); i++) {
            String name = getColumnInternalName(i);
            rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false));
            colNames.add(name);
            colExprMap.put(name, expressions.get(i));
        }
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
        input.setColumnExprMap(colExprMap);
    }
    return input;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 AccessControlException (java.security.AccessControlException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 PatternSyntaxException (java.util.regex.PatternSyntaxException)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)1 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)1 Deserializer (org.apache.hadoop.hive.serde2.Deserializer)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1