Search in sources :

Example 1 with PTFExpressionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.

the class SemanticAnalyzer method buildPTFReduceSinkDetails.

/**
 * Construct the data structures containing ExprNodeDesc for partition
 * columns and order columns. Use the input definition to construct the list
 * of output columns for the ReduceSinkOperator
 *
 * @throws SemanticException
 */
void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, RowResolver inputRR, ArrayList<ExprNodeDesc> partCols, ArrayList<ExprNodeDesc> orderCols, StringBuilder orderString, StringBuilder nullOrderString) throws SemanticException {
    List<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
    for (PTFExpressionDef colDef : partColList) {
        ExprNodeDesc exprNode = colDef.getExprNode();
        if (ExprNodeDescUtils.indexOf(exprNode, partCols) < 0) {
            partCols.add(exprNode);
            orderCols.add(exprNode);
            orderString.append('+');
            nullOrderString.append('a');
        }
    }
    /*
     * Order columns are used as key columns for constructing
     * the ReduceSinkOperator
     * Since we do not explicitly add these to outputColumnNames,
     * we need to set includeKeyCols = false while creating the
     * ReduceSinkDesc
     */
    List<OrderExpressionDef> orderColList = tabDef.getOrder().getExpressions();
    for (int i = 0; i < orderColList.size(); i++) {
        OrderExpressionDef colDef = orderColList.get(i);
        char orderChar = colDef.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-';
        char nullOrderChar = colDef.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z';
        int index = ExprNodeDescUtils.indexOf(colDef.getExprNode(), orderCols);
        if (index >= 0) {
            orderString.setCharAt(index, orderChar);
            nullOrderString.setCharAt(index, nullOrderChar);
            continue;
        }
        orderCols.add(colDef.getExprNode());
        orderString.append(orderChar);
        nullOrderString.append(nullOrderChar);
    }
}
Also used : PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)

Example 2 with PTFExpressionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.

the class SemanticAnalyzer method buildPTFReduceSinkDetails.

/**
 * Construct the data structures containing ExprNodeDesc for partition
 * columns and order columns. Use the input definition to construct the list
 * of output columns for the ReduceSinkOperator
 */
private void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, List<ExprNodeDesc> partCols, List<ExprNodeDesc> orderCols, StringBuilder orderString, StringBuilder nullOrderString) {
    List<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
    for (PTFExpressionDef colDef : partColList) {
        ExprNodeDesc exprNode = colDef.getExprNode();
        if (ExprNodeDescUtils.indexOf(exprNode, partCols) < 0) {
            partCols.add(exprNode);
            orderCols.add(exprNode);
            orderString.append('+');
            nullOrderString.append('a');
        }
    }
    /*
     * Order columns are used as key columns for constructing
     * the ReduceSinkOperator
     * Since we do not explicitly add these to outputColumnNames,
     * we need to set includeKeyCols = false while creating the
     * ReduceSinkDesc
     */
    List<OrderExpressionDef> orderColList = tabDef.getOrder().getExpressions();
    for (OrderExpressionDef colDef : orderColList) {
        char orderChar = colDef.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-';
        char nullOrderChar = colDef.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z';
        int index = ExprNodeDescUtils.indexOf(colDef.getExprNode(), orderCols);
        if (index >= 0) {
            orderString.setCharAt(index, orderChar);
            nullOrderString.setCharAt(index, nullOrderChar);
            continue;
        }
        orderCols.add(colDef.getExprNode());
        orderString.append(orderChar);
        nullOrderString.append(nullOrderChar);
    }
}
Also used : PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)

Example 3 with PTFExpressionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.

the class Vectorizer method createVectorPTFDesc.

/*
   * Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
   * VectorizationContext to lookup column names, etc.
   */
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
    List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
    final int functionCount = windowsFunctions.size();
    List<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
    final int outputSize = outputSignature.size();
    /*
     * Output columns.
     */
    TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
    DataTypePhysicalVariation[] reducerBatchDataTypePhysicalVariations = vContext.getAllDataTypePhysicalVariations();
    // Evaluator results are first.
    String[] outputColumnNames = new String[outputSize];
    TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
    DataTypePhysicalVariation[] outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputSize];
    for (int i = 0; i < functionCount; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        TypeInfo typeInfo = colInfo.getType();
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = typeInfo;
        outputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
    }
    // Followed by key and non-key input columns (some may be missing).
    for (int i = functionCount; i < outputSize; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = colInfo.getType();
        outputDataTypePhysicalVariations[i] = reducerBatchDataTypePhysicalVariations[i - functionCount];
    }
    List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
    final int partitionKeyCount = partitionExpressions.size();
    ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
    List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
    final int orderKeyCount = orderExpressions.size();
    ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
    // When there are PARTITION and ORDER BY clauses, will have different partition expressions.
    // Otherwise, only order by expressions.
    boolean isPartitionOrderBy = false;
    if (partitionKeyCount != orderKeyCount) {
        // Obviously different expressions.
        isPartitionOrderBy = true;
    } else {
        // Check each ExprNodeDesc.
        for (int i = 0; i < partitionKeyCount; i++) {
            final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
            final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
            if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
                isPartitionOrderBy = true;
                break;
            }
        }
    }
    String[] evaluatorFunctionNames = new String[functionCount];
    boolean[] evaluatorsAreDistinct = new boolean[functionCount];
    WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
    fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorsAreDistinct, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos, reducerBatchDataTypePhysicalVariations);
    vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
    vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
    vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
    vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
    vectorPTFDesc.setEvaluatorsAreDistinct(evaluatorsAreDistinct);
    vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
    vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setOutputColumnNames(outputColumnNames);
    vectorPTFDesc.setOutputTypeInfos(outputTypeInfos, outputDataTypePhysicalVariations);
    vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
Also used : PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeDescEqualityWrapper(org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)

Example 4 with PTFExpressionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.

the class PTFDeserializer method initializeWindowing.

public void initializeWindowing(WindowTableFunctionDef def) throws HiveException {
    ShapeDetails inpShape = def.getInput().getOutputShape();
    /*
     * 1. setup resolve, make connections
     */
    TableFunctionEvaluator tEval = def.getTFunction();
    WindowingTableFunctionResolver tResolver = (WindowingTableFunctionResolver) constructResolver(def.getResolverClassName());
    tResolver.initialize(hConf, ptfDesc, def, tEval);
    /*
     * 2. initialize WFns.
     */
    for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
        if (wFnDef.getArgs() != null) {
            for (PTFExpressionDef arg : wFnDef.getArgs()) {
                initialize(arg, inpShape);
            }
        }
        if (wFnDef.getWindowFrame() != null) {
            WindowFrameDef wFrmDef = wFnDef.getWindowFrame();
            initialize(wFrmDef, inpShape);
        }
        setupWdwFnEvaluator(wFnDef);
    }
    ArrayList<String> aliases = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
    for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
        aliases.add(wFnDef.getAlias());
        if (wFnDef.isPivotResult()) {
            fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
        } else {
            fieldOIs.add(wFnDef.getOI());
        }
    }
    PTFDeserializer.addInputColumnsToList(inpShape, aliases, fieldOIs);
    StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
    tResolver.setWdwProcessingOutputOI(wdwOutOI);
    initialize(def.getOutputShape(), wdwOutOI);
    tResolver.initializeOutputOI();
}
Also used : WindowingTableFunctionResolver(org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) TableFunctionEvaluator(org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) ArrayList(java.util.ArrayList) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) ShapeDetails(org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 5 with PTFExpressionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.

the class PTFDeserializer method setupWdwFnEvaluator.

static void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException {
    List<PTFExpressionDef> args = def.getArgs();
    List<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
    ObjectInspector[] funcArgOIs = null;
    if (args != null) {
        for (PTFExpressionDef arg : args) {
            argOIs.add(arg.getOI());
        }
        funcArgOIs = new ObjectInspector[args.size()];
        funcArgOIs = argOIs.toArray(funcArgOIs);
    }
    GenericUDAFEvaluator wFnEval = def.getWFnEval();
    ObjectInspector OI = wFnEval.init(GenericUDAFEvaluator.Mode.COMPLETE, funcArgOIs);
    def.setWFnEval(wFnEval);
    def.setOI(OI);
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)

Aggregations

PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)26 OrderExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef)12 ArrayList (java.util.ArrayList)7 Test (org.junit.Test)7 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 WindowFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)6 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)5 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)5 ShapeDetails (org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails)4 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)4 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)3 GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)3 TableFunctionEvaluator (org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator)3 WindowingTableFunctionResolver (org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver)3 Timestamp (org.apache.hadoop.hive.common.type.Timestamp)2 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)2 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)2 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)2 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)2