Search in sources :

Example 21 with ExprNodeFieldDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc in project hive by apache.

the class ColumnPrunerProcCtx method getNestedColsFromExprNodeDesc.

private static void getNestedColsFromExprNodeDesc(ExprNodeDesc desc, FieldNode pathToRoot, List<FieldNode> paths) {
    if (desc instanceof ExprNodeColumnDesc) {
        ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) desc;
        FieldNode p = new FieldNode(columnDesc.getColumn());
        checkListAndMap(columnDesc, pathToRoot, p);
        paths.add(p);
    } else if (desc instanceof ExprNodeFieldDesc) {
        ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) desc;
        ExprNodeDesc childDesc = fieldDesc.getDesc();
        FieldNode p = new FieldNode(fieldDesc.getFieldName());
        checkListAndMap(fieldDesc, pathToRoot, p);
        getNestedColsFromExprNodeDesc(childDesc, p, paths);
    } else {
        List<ExprNodeDesc> children = desc.getChildren();
        if (children != null) {
            for (ExprNodeDesc c : children) {
                getNestedColsFromExprNodeDesc(c, pathToRoot, paths);
            }
        }
    }
}
Also used : ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 22 with ExprNodeFieldDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc in project hive by apache.

the class StatsUtils method getColStatisticsFromExpression.

/**
 * Get column statistics expression nodes
 * @param conf
 *          - hive conf
 * @param parentStats
 *          - parent statistics
 * @param end
 *          - expression nodes
 * @return column statistics
 */
public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, ExprNodeDesc end) {
    if (end == null) {
        return null;
    }
    String colName = null;
    String colType = null;
    double avgColSize = 0;
    long countDistincts = 0;
    long numNulls = 0;
    ObjectInspector oi = end.getWritableObjectInspector();
    long numRows = parentStats.getNumRows();
    if (end instanceof ExprNodeColumnDesc) {
        // column projection
        ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
        colName = encd.getColumn();
        if (encd.getIsPartitionColOrVirtualCol()) {
            ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName);
            if (colStats != null) {
                /* If statistics for the column already exist use it. */
                return colStats.clone();
            }
            // virtual columns
            colType = encd.getTypeInfo().getTypeName();
            countDistincts = numRows;
        } else {
            // clone the column stats and return
            ColStatistics result = parentStats.getColumnStatisticsFromColName(colName);
            if (result != null) {
                return result.clone();
            }
            return null;
        }
    } else if (end instanceof ExprNodeConstantDesc) {
        return buildColStatForConstant(conf, numRows, (ExprNodeConstantDesc) end);
    } else if (end instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end;
        colName = engfd.getName();
        colType = engfd.getTypeString();
        // If it is a widening cast, we do not change NDV, min, max
        if (isWideningCast(engfd) && engfd.getChildren().get(0) instanceof ExprNodeColumnDesc) {
            // cast on single column
            ColStatistics stats = parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0));
            if (stats != null) {
                ColStatistics newStats;
                newStats = stats.clone();
                newStats.setColumnName(colName);
                colType = colType.toLowerCase();
                newStats.setColumnType(colType);
                newStats.setAvgColLen(getAvgColLenOf(conf, oi, colType));
                return newStats;
            }
        }
        if (conf.getBoolVar(ConfVars.HIVE_STATS_ESTIMATORS_ENABLE)) {
            Optional<StatEstimatorProvider> sep = engfd.getGenericUDF().adapt(StatEstimatorProvider.class);
            if (sep.isPresent()) {
                StatEstimator se = sep.get().getStatEstimator();
                List<ColStatistics> csList = new ArrayList<ColStatistics>();
                for (ExprNodeDesc child : engfd.getChildren()) {
                    ColStatistics cs = getColStatisticsFromExpression(conf, parentStats, child);
                    if (cs == null) {
                        break;
                    }
                    csList.add(cs);
                }
                if (csList.size() == engfd.getChildren().size()) {
                    Optional<ColStatistics> res = se.estimate(csList);
                    if (res.isPresent()) {
                        ColStatistics newStats = res.get();
                        colType = colType.toLowerCase();
                        newStats.setColumnType(colType);
                        newStats.setColumnName(colName);
                        return newStats;
                    }
                }
            }
        }
        // fallback to default
        countDistincts = getNDVFor(engfd, numRows, parentStats);
    } else if (end instanceof ExprNodeColumnListDesc) {
        // column list
        ExprNodeColumnListDesc encd = (ExprNodeColumnListDesc) end;
        colName = Joiner.on(",").join(encd.getCols());
        colType = serdeConstants.LIST_TYPE_NAME;
        countDistincts = numRows;
    } else if (end instanceof ExprNodeFieldDesc) {
        // field within complex type
        ExprNodeFieldDesc enfd = (ExprNodeFieldDesc) end;
        colName = enfd.getFieldName();
        colType = enfd.getTypeString();
        countDistincts = numRows;
    } else if (end instanceof ExprDynamicParamDesc) {
        // possible to create colstats object
        return null;
    } else {
        throw new IllegalArgumentException("not supported expr type " + end.getClass());
    }
    colType = colType.toLowerCase();
    avgColSize = getAvgColLenOf(conf, oi, colType);
    ColStatistics colStats = new ColStatistics(colName, colType);
    colStats.setAvgColLen(avgColSize);
    colStats.setCountDistint(countDistincts);
    colStats.setNumNulls(numNulls);
    return colStats;
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) WritableTimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampLocalTZObjectInspector) StandardConstantListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector) StandardConstantMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) StandardConstantStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) StatEstimator(org.apache.hadoop.hive.ql.stats.estimator.StatEstimator) ExprNodeColumnListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc) ExprDynamicParamDesc(org.apache.hadoop.hive.ql.plan.ExprDynamicParamDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) StatEstimatorProvider(org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 23 with ExprNodeFieldDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan3.

/**
 * test reduce with multiple tagged inputs.
 */
@SuppressWarnings("unchecked")
private void populateMapRedPlan3(Table src, Table src2) throws SemanticException {
    List<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    // map-side work
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, Byte.valueOf((byte) 0), 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
    addMapWork(mr, src, "a", op1);
    Operator<ReduceSinkDesc> op2 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("key")), outputColumns, true, Byte.valueOf((byte) 1), Integer.MAX_VALUE, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
    addMapWork(mr, src2, "b", op2);
    ReduceWork rWork = new ReduceWork();
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setNeedsTagging(true);
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    mr.setReduceWork(rWork);
    rWork.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());
    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan3.out"), Utilities.defaultTd, false));
    Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(Utilities.makeList(new ExprNodeFieldDesc(TypeInfoFactory.stringTypeInfo, new ExprNodeColumnDesc(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo), Utilities.ReduceField.VALUE.toString(), "", false), "0", false)), Utilities.makeList(outputColumns.get(0))), op4);
    rWork.setReducer(op5);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 24 with ExprNodeFieldDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc in project hive by apache.

the class SemanticAnalyzer method genAllExprNodeDesc.

/**
 * Generates all of the expression node descriptors for the expression and children of it
 * passed in the arguments. This function uses the row resolver and the metadata information
 * that are passed as arguments to resolve the column names to internal names.
 *
 * @param expr
 *          The expression
 * @param input
 *          The row resolver
 * @param tcCtx
 *          Customized type-checking context
 * @return expression to exprNodeDesc mapping
 * @throws SemanticException Failed to evaluate expression
 */
@SuppressWarnings("nls")
Map<ASTNode, ExprNodeDesc> genAllExprNodeDesc(ASTNode expr, RowResolver input, TypeCheckCtx tcCtx) throws SemanticException {
    // Create the walker and  the rules dispatcher.
    tcCtx.setUnparseTranslator(unparseTranslator);
    Map<ASTNode, ExprNodeDesc> nodeOutputs = ExprNodeTypeCheck.genExprNode(expr, tcCtx);
    ExprNodeDesc desc = nodeOutputs.get(expr);
    if (desc == null) {
        String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText());
        ColumnInfo colInfo = input.get(null, tableOrCol);
        String errMsg;
        if (colInfo == null && input.getIsExprResolver()) {
            errMsg = ASTErrorUtils.getMsg(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(), expr);
        } else {
            errMsg = tcCtx.getError();
        }
        throw new SemanticException(Optional.ofNullable(errMsg).orElse("Error in parsing "));
    }
    if (desc instanceof ExprNodeColumnListDesc) {
        throw new SemanticException("TOK_ALLCOLREF is not supported in current context");
    }
    if (!unparseTranslator.isEnabled()) {
        // Not creating a view, so no need to track view expansions.
        return nodeOutputs;
    }
    List<ASTNode> fieldDescList = new ArrayList<>();
    for (Map.Entry<ASTNode, ExprNodeDesc> entry : nodeOutputs.entrySet()) {
        if (!(entry.getValue() instanceof ExprNodeColumnDesc)) {
            // struct<>.
            if (entry.getValue() instanceof ExprNodeFieldDesc) {
                fieldDescList.add(entry.getKey());
            }
            continue;
        }
        ASTNode node = entry.getKey();
        ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) entry.getValue();
        if ((columnDesc.getTabAlias() == null) || (columnDesc.getTabAlias().length() == 0)) {
            // internal expressions used in the representation of aggregation.
            continue;
        }
        String[] tmp = input.reverseLookup(columnDesc.getColumn());
        // check if outer present && (tmp is null || tmp not null - contains tbl info)
        if (tcCtx.getOuterRR() != null && (tmp == null || (tmp[0] != null && columnDesc.getTabAlias() != null && !tmp[0].equals(columnDesc.getTabAlias())))) {
            tmp = tcCtx.getOuterRR().reverseLookup(columnDesc.getColumn());
        }
        StringBuilder replacementText = new StringBuilder();
        replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
        replacementText.append(".");
        replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
        unparseTranslator.addTranslation(node, replacementText.toString());
    }
    for (ASTNode node : fieldDescList) {
        Map<ASTNode, String> map = translateFieldDesc(node);
        for (Entry<ASTNode, String> entry : map.entrySet()) {
            unparseTranslator.addTranslation(entry.getKey(), entry.getValue().toLowerCase());
        }
    }
    return nodeOutputs;
}
Also used : ExprNodeColumnListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)24 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)23 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)22 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)15 ArrayList (java.util.ArrayList)14 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)13 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)6 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)6 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)6 Test (org.junit.Test)6 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)5 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)5 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)5 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 ExprNodeDynamicValueDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc)4 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)4 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)3 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)3 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)3