Search in sources :

Example 71 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class FunctionRegistry method getCommonClassForUnionAll.

/**
 * Find a common type for union-all operator. Only the common types for the same
 * type group will resolve to a common type. No implicit conversion across different
 * type groups will be done.
 */
public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) {
    if (a.equals(b)) {
        return a;
    }
    if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
        return null;
    }
    PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory();
    PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory();
    if (pcA == pcB) {
        // Same primitive category but different qualifiers.
        return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, pcA);
    }
    PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA);
    PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB);
    // untyped nulls
    if (pgA == PrimitiveGrouping.VOID_GROUP) {
        return b;
    }
    if (pgB == PrimitiveGrouping.VOID_GROUP) {
        return a;
    }
    if (pgA != pgB) {
        return null;
    }
    switch(pgA) {
        case STRING_GROUP:
            return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, PrimitiveCategory.STRING);
        case NUMERIC_GROUP:
            return TypeInfoUtils.implicitConvertible(a, b) ? b : a;
        case DATE_GROUP:
            return TypeInfoFactory.timestampTypeInfo;
        default:
            return null;
    }
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveGrouping(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping)

Example 72 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class Vectorizer method validatePTFOperator.

private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
    if (!isPtfVectorizationEnabled) {
        setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
        return false;
    }
    PTFDesc ptfDesc = (PTFDesc) op.getConf();
    boolean isMapSide = ptfDesc.isMapSide();
    if (isMapSide) {
        setOperatorIssue("PTF Mapper not supported");
        return false;
    }
    List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
    if (ptfParents != null && ptfParents.size() > 0) {
        Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
        if (!(ptfParent instanceof ReduceSinkOperator)) {
            boolean isReduceShufflePtf = false;
            if (ptfParent instanceof SelectOperator) {
                ptfParents = ptfParent.getParentOperators();
                if (ptfParents == null || ptfParents.size() == 0) {
                    isReduceShufflePtf = true;
                } else {
                    ptfParent = ptfParent.getParentOperators().get(0);
                    isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
                }
            }
            if (!isReduceShufflePtf) {
                setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
                return false;
            }
        }
    }
    boolean forNoop = ptfDesc.forNoop();
    if (forNoop) {
        setOperatorIssue("NOOP not supported");
        return false;
    }
    boolean forWindowing = ptfDesc.forWindowing();
    if (!forWindowing) {
        setOperatorIssue("Windowing required");
        return false;
    }
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
    if (!isWindowTableFunctionDef) {
        setOperatorIssue("Must be a WindowTableFunctionDef");
        return false;
    }
    try {
        createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
    } catch (HiveException e) {
        setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
        return false;
    }
    // Output columns ok?
    String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
    TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
    final int outputCount = outputColumnNames.length;
    for (int i = 0; i < outputCount; i++) {
        String typeName = outputTypeInfos[i].getTypeName();
        boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
        false);
        if (!ret) {
            setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
            return false;
        }
    }
    boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
    String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
    final int count = evaluatorFunctionNames.length;
    WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
    for (int i = 0; i < count; i++) {
        String functionName = evaluatorFunctionNames[i];
        SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
        if (supportedFunctionType == null) {
            setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
            return false;
        }
        WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
        if (!windowFrameDef.isStartUnbounded()) {
            setOperatorIssue(functionName + " only UNBOUNDED start frame is supported");
            return false;
        }
        switch(windowFrameDef.getWindowType()) {
            case RANGE:
                if (!windowFrameDef.getEnd().isCurrentRow()) {
                    setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
                    return false;
                }
                break;
            case ROWS:
                if (!windowFrameDef.isEndUnbounded()) {
                    setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type");
                    return false;
                }
                break;
            default:
                throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
        }
        List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
        if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
            setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
            return false;
        }
        if (exprNodeDescList != null) {
            ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
            if (containsLeadLag(exprNodeDesc)) {
                setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
                return false;
            }
            if (supportedFunctionType != SupportedFunctionType.COUNT && supportedFunctionType != SupportedFunctionType.DENSE_RANK && supportedFunctionType != SupportedFunctionType.RANK) {
                // COUNT, DENSE_RANK, and RANK do not care about column types.  The rest do.
                TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
                Category category = typeInfo.getCategory();
                boolean isSupportedType;
                if (category != Category.PRIMITIVE) {
                    isSupportedType = false;
                } else {
                    ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
                    switch(colVecType) {
                        case LONG:
                        case DOUBLE:
                        case DECIMAL:
                            isSupportedType = true;
                            break;
                        default:
                            isSupportedType = false;
                            break;
                    }
                }
                if (!isSupportedType) {
                    setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorReduceSinkObjectHashOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorReduceSinkEmptyKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator) VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 73 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class Vectorizer method canSpecializeMapJoin.

private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinDesc vectorDesc) throws HiveException {
    Preconditions.checkState(op instanceof MapJoinOperator);
    VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
    boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean oneMapJoinCondition = (desc.getConds().length == 1);
    boolean hasNullSafes = onExpressionHasNullSafes(desc);
    byte posBigTable = (byte) desc.getPosBigTable();
    // Since we want to display all the met and not met conditions in EXPLAIN, we determine all
    // information first....
    List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
    boolean outerJoinHasNoKeys = (!desc.isNoOuterJoin() && keyDesc.size() == 0);
    // For now, we don't support joins on or using DECIMAL_64.
    VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keyDesc);
    final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length;
    // Assume.
    boolean supportsKeyTypes = true;
    HashSet<String> notSupportedKeyTypes = new HashSet<String>();
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength];
    String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength];
    TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength];
    ArrayList<VectorExpression> bigTableKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] slimmedBigTableKeyExpressions;
    for (int i = 0; i < allBigTableKeyExpressionsLength; i++) {
        VectorExpression ve = allBigTableKeyExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableKeyExpressionsList.add(ve);
        }
        bigTableKeyColumnMap[i] = ve.getOutputColumnNum();
        ExprNodeDesc exprNode = keyDesc.get(i);
        bigTableKeyColumnNames[i] = exprNode.toString();
        TypeInfo typeInfo = exprNode.getTypeInfo();
        // same check used in HashTableLoader.
        if (!MapJoinKey.isSupportedField(typeInfo)) {
            supportsKeyTypes = false;
            Category category = typeInfo.getCategory();
            notSupportedKeyTypes.add((category != Category.PRIMITIVE ? category.toString() : ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString()));
        }
        bigTableKeyTypeInfos[i] = typeInfo;
    }
    if (bigTableKeyExpressionsList.size() == 0) {
        slimmedBigTableKeyExpressions = null;
    } else {
        slimmedBigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable);
    // For now, we don't support joins on or using DECIMAL_64.
    VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressionsUpConvertDecimal64(bigTableExprs);
    boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
    // Especially since LLAP is prone to turn it off in the MapJoinDesc in later
    // physical optimizer stages...
    boolean isHybridHashJoin = desc.isHybridHashJoin();
    /*
     * Populate vectorMapJoininfo.
     */
    /*
     * Similarly, we need a mapping since a value expression can be a calculation and the value
     * will go into a scratch column.
     */
    int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length];
    String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length];
    TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length];
    ArrayList<VectorExpression> bigTableValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] slimmedBigTableValueExpressions;
    for (int i = 0; i < bigTableValueColumnMap.length; i++) {
        VectorExpression ve = allBigTableValueExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableValueExpressionsList.add(ve);
        }
        bigTableValueColumnMap[i] = ve.getOutputColumnNum();
        ExprNodeDesc exprNode = bigTableExprs.get(i);
        bigTableValueColumnNames[i] = exprNode.toString();
        bigTableValueTypeInfos[i] = exprNode.getTypeInfo();
    }
    if (bigTableValueExpressionsList.size() == 0) {
        slimmedBigTableValueExpressions = null;
    } else {
        slimmedBigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap);
    vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames);
    vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos);
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
    vectorDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions);
    vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap);
    vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames);
    vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos);
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
    vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions);
    /*
     * Small table information.
     */
    VectorColumnOutputMapping bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping");
    VectorColumnOutputMapping bigTableOuterKeyMapping = new VectorColumnOutputMapping("Big Table Outer Key Mapping");
    // The order of the fields in the LazyBinary small table value must be used, so
    // we use the source ordering flavor for the mapping.
    VectorColumnSourceMapping smallTableMapping = new VectorColumnSourceMapping("Small Table Mapping");
    Byte[] order = desc.getTagOrder();
    Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
    boolean isOuterJoin = !desc.getNoOuterJoin();
    /*
     * Gather up big and small table output result information from the MapJoinDesc.
     */
    List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable);
    int bigTableRetainSize = bigTableRetainList.size();
    int[] smallTableIndices;
    int smallTableIndicesSize;
    List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
    if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
        smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
        smallTableIndicesSize = smallTableIndices.length;
    } else {
        smallTableIndices = null;
        smallTableIndicesSize = 0;
    }
    List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
    int smallTableRetainSize = smallTableRetainList.size();
    int smallTableResultSize = 0;
    if (smallTableIndicesSize > 0) {
        smallTableResultSize = smallTableIndicesSize;
    } else if (smallTableRetainSize > 0) {
        smallTableResultSize = smallTableRetainSize;
    }
    /*
     * Determine the big table retained mapping first so we can optimize out (with
     * projection) copying inner join big table keys in the subsequent small table results section.
     */
    // We use a mapping object here so we can build the projection in any order and
    // get the ordered by 0 to n-1 output columns at the end.
    // 
    // Also, to avoid copying a big table key into the small table result area for inner joins,
    // we reference it with the projection so there can be duplicate output columns
    // in the projection.
    VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
    int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize);
    for (int i = 0; i < bigTableRetainSize; i++) {
        // Since bigTableValueExpressions may do a calculation and produce a scratch column, we
        // need to map to the right batch column.
        int retainColumn = bigTableRetainList.get(i);
        int batchColumnIndex = bigTableValueColumnMap[retainColumn];
        TypeInfo typeInfo = bigTableValueTypeInfos[i];
        // With this map we project the big table batch to make it look like an output batch.
        projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
        // Collect columns we copy from the big table batch to the overflow batch.
        if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) {
            // Tolerate repeated use of a big table column.
            bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
        }
        nextOutputColumn++;
    }
    /*
     * Now determine the small table results.
     */
    boolean smallTableExprVectorizes = true;
    int firstSmallTableOutputColumn;
    firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0);
    int smallTableOutputCount = 0;
    nextOutputColumn = firstSmallTableOutputColumn;
    // Small table indices has more information (i.e. keys) than retain, so use it if it exists...
    String[] bigTableRetainedNames;
    if (smallTableIndicesSize > 0) {
        smallTableOutputCount = smallTableIndicesSize;
        bigTableRetainedNames = new String[smallTableOutputCount];
        for (int i = 0; i < smallTableIndicesSize; i++) {
            if (smallTableIndices[i] >= 0) {
                // Zero and above numbers indicate a big table key is needed for
                // small table result "area".
                int keyIndex = smallTableIndices[i];
                // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we
                // need to map the right column.
                int batchKeyColumn = bigTableKeyColumnMap[keyIndex];
                bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex];
                TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex];
                if (!isOuterJoin) {
                    // Optimize inner join keys of small table results.
                    // Project the big table key into the small table result "area".
                    projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo);
                    if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) {
                        // If necessary, copy the big table key into the overflow batch's small table
                        // result "area".
                        bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo);
                    }
                } else {
                    // For outer joins, since the small table key can be null when there is no match,
                    // we must have a physical (scratch) column for those keys.  We cannot use the
                    // projection optimization used by inner joins above.
                    int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                    projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                    bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo);
                    bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo);
                }
            } else {
                // Negative numbers indicate a column to be (deserialize) read from the small table's
                // LazyBinary value row.
                int smallTableValueIndex = -smallTableIndices[i] - 1;
                ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
                if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                    clearNotVectorizedReason();
                    smallTableExprVectorizes = false;
                }
                bigTableRetainedNames[i] = smallTableExprNode.toString();
                TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
                // Make a new big table scratch column for the small table value.
                int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            }
            nextOutputColumn++;
        }
    } else if (smallTableRetainSize > 0) {
        smallTableOutputCount = smallTableRetainSize;
        bigTableRetainedNames = new String[smallTableOutputCount];
        for (int i = 0; i < smallTableRetainSize; i++) {
            int smallTableValueIndex = smallTableRetainList.get(i);
            ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
            if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                clearNotVectorizedReason();
                smallTableExprVectorizes = false;
            }
            bigTableRetainedNames[i] = smallTableExprNode.toString();
            // Make a new big table scratch column for the small table value.
            TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
            int scratchColumn = vContext.allocateScratchColumn(typeInfo);
            projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
            smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            nextOutputColumn++;
        }
    } else {
        bigTableRetainedNames = new String[0];
    }
    boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    // Remember the condition variables for EXPLAIN regardless of whether we specialize or not.
    vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
    vectorDesc.setUseOptimizedTable(useOptimizedTable);
    vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setOneMapJoinCondition(oneMapJoinCondition);
    vectorDesc.setHasNullSafes(hasNullSafes);
    vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes);
    vectorDesc.setOuterJoinHasNoKeys(outerJoinHasNoKeys);
    vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled);
    vectorDesc.setIsHybridHashJoin(isHybridHashJoin);
    vectorDesc.setSupportsKeyTypes(supportsKeyTypes);
    if (!supportsKeyTypes) {
        vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes));
    }
    // Check common conditions for both Optimized and Fast Hash Tables.
    // Assume.
    boolean result = true;
    if (!useOptimizedTable || !isVectorizationMapJoinNativeEnabled || !isTezOrSpark || !oneMapJoinCondition || hasNullSafes || !smallTableExprVectorizes || outerJoinHasNoKeys) {
        result = false;
    }
    if (!isFastHashTableEnabled) {
        // Check optimized-only hash table restrictions.
        if (!supportsKeyTypes) {
            result = false;
        }
    } else {
        if (isHybridHashJoin) {
            result = false;
        }
    }
    // Convert dynamic arrays and maps to simple arrays.
    bigTableRetainedMapping.finalize();
    bigTableOuterKeyMapping.finalize();
    smallTableMapping.finalize();
    vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping);
    vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping);
    vectorMapJoinInfo.setSmallTableMapping(smallTableMapping);
    projectionMapping.finalize();
    // Verify we added an entry for each output.
    assert projectionMapping.isSourceSequenceGood();
    vectorMapJoinInfo.setProjectionMapping(projectionMapping);
    return result;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ArrayList(java.util.ArrayList) VectorColumnOutputMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VectorColumnSourceMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 74 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class Vectorizer method validateStructInExpression.

private boolean validateStructInExpression(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode) {
    for (ExprNodeDesc d : desc.getChildren()) {
        TypeInfo typeInfo = d.getTypeInfo();
        if (typeInfo.getCategory() != Category.STRUCT) {
            return false;
        }
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
        ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
        final int fieldCount = fieldTypeInfos.size();
        for (int f = 0; f < fieldCount; f++) {
            TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
            Category category = fieldTypeInfo.getCategory();
            if (category != Category.PRIMITIVE) {
                setExpressionIssue(expressionTitle, "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName());
                return false;
            }
            PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
            InConstantType inConstantType = VectorizationContext.getInConstantTypeFromPrimitiveCategory(fieldPrimitiveTypeInfo.getPrimitiveCategory());
            // For now, limit the data types we support for Vectorized Struct IN().
            if (inConstantType != InConstantType.INT_FAMILY && inConstantType != InConstantType.FLOAT_FAMILY && inConstantType != InConstantType.STRING_FAMILY) {
                setExpressionIssue(expressionTitle, "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName());
                return false;
            }
        }
    }
    return true;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 75 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class BinarySortableDeserializeRead method readPrimitive.

private boolean readPrimitive(Field field) throws IOException {
    final int fieldIndex = root.index;
    field.start = inputByteBuffer.tell();
    /*
     * We have a field and are positioned to it.  Read it.
     */
    switch(field.primitiveCategory) {
        case BOOLEAN:
            currentBoolean = (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) == 2);
            return true;
        case BYTE:
            currentByte = (byte) (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) ^ 0x80);
            return true;
        case SHORT:
            {
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                int v = inputByteBuffer.read(invert) ^ 0x80;
                v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
                currentShort = (short) v;
            }
            return true;
        case INT:
            {
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                int v = inputByteBuffer.read(invert) ^ 0x80;
                for (int i = 0; i < 3; i++) {
                    v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                currentInt = v;
            }
            return true;
        case LONG:
            {
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                long v = inputByteBuffer.read(invert) ^ 0x80;
                for (int i = 0; i < 7; i++) {
                    v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                currentLong = v;
            }
            return true;
        case DATE:
            {
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                int v = inputByteBuffer.read(invert) ^ 0x80;
                for (int i = 0; i < 3; i++) {
                    v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                currentDateWritable.set(v);
            }
            return true;
        case TIMESTAMP:
            {
                if (tempTimestampBytes == null) {
                    tempTimestampBytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];
                }
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                for (int i = 0; i < tempTimestampBytes.length; i++) {
                    tempTimestampBytes[i] = inputByteBuffer.read(invert);
                }
                currentTimestampWritable.setBinarySortable(tempTimestampBytes, 0);
            }
            return true;
        case FLOAT:
            {
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                int v = 0;
                for (int i = 0; i < 4; i++) {
                    v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                if ((v & (1 << 31)) == 0) {
                    // negative number, flip all bits
                    v = ~v;
                } else {
                    // positive number, flip the first bit
                    v = v ^ (1 << 31);
                }
                currentFloat = Float.intBitsToFloat(v);
            }
            return true;
        case DOUBLE:
            {
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                long v = 0;
                for (int i = 0; i < 8; i++) {
                    v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                if ((v & (1L << 63)) == 0) {
                    // negative number, flip all bits
                    v = ~v;
                } else {
                    // positive number, flip the first bit
                    v = v ^ (1L << 63);
                }
                currentDouble = Double.longBitsToDouble(v);
            }
            return true;
        case BINARY:
        case STRING:
        case CHAR:
        case VARCHAR:
            {
                /*
         * This code is a modified version of BinarySortableSerDe.deserializeText that lets us
         * detect if we can return a reference to the bytes directly.
         */
                // Get the actual length first
                bytesStart = inputByteBuffer.tell();
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                int length = 0;
                do {
                    byte b = inputByteBuffer.read(invert);
                    if (b == 0) {
                        // end of string
                        break;
                    }
                    if (b == 1) {
                        // the last char is an escape char. read the actual char
                        inputByteBuffer.read(invert);
                    }
                    length++;
                } while (true);
                if (length == 0 || (!invert && length == inputByteBuffer.tell() - bytesStart - 1)) {
                    // No inversion or escaping happened, so we are can reference directly.
                    currentExternalBufferNeeded = false;
                    currentBytes = inputByteBuffer.getData();
                    currentBytesStart = bytesStart;
                    currentBytesLength = length;
                } else {
                    // We are now positioned at the end of this field's bytes.
                    if (useExternalBuffer) {
                        // If we decided not to reposition and re-read the buffer to copy it with
                        // copyToExternalBuffer, we we will still be correctly positioned for the next field.
                        currentExternalBufferNeeded = true;
                        currentExternalBufferNeededLen = length;
                    } else {
                        // The copyToBuffer will reposition and re-read the input buffer.
                        currentExternalBufferNeeded = false;
                        if (internalBufferLen < length) {
                            internalBufferLen = length;
                            internalBuffer = new byte[internalBufferLen];
                        }
                        copyToBuffer(internalBuffer, 0, length);
                        currentBytes = internalBuffer;
                        currentBytesStart = 0;
                        currentBytesLength = length;
                    }
                }
            }
            return true;
        case INTERVAL_YEAR_MONTH:
            {
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                int v = inputByteBuffer.read(invert) ^ 0x80;
                for (int i = 0; i < 3; i++) {
                    v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                currentHiveIntervalYearMonthWritable.set(v);
            }
            return true;
        case INTERVAL_DAY_TIME:
            {
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                long totalSecs = inputByteBuffer.read(invert) ^ 0x80;
                for (int i = 0; i < 7; i++) {
                    totalSecs = (totalSecs << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                int nanos = inputByteBuffer.read(invert) ^ 0x80;
                for (int i = 0; i < 3; i++) {
                    nanos = (nanos << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                currentHiveIntervalDayTimeWritable.set(totalSecs, nanos);
            }
            return true;
        case DECIMAL:
            {
                // Since enforcing precision and scale can cause a HiveDecimal to become NULL,
                // we must read it, enforce it here, and either return NULL or buffer the result.
                final boolean invert = columnSortOrderIsDesc[fieldIndex];
                int b = inputByteBuffer.read(invert) - 1;
                if (!(b == 1 || b == -1 || b == 0)) {
                    throw new IOException("Unexpected byte value " + (int) b + " in binary sortable format data (invert " + invert + ")");
                }
                final boolean positive = b != -1;
                int factor = inputByteBuffer.read(invert) ^ 0x80;
                for (int i = 0; i < 3; i++) {
                    factor = (factor << 8) + (inputByteBuffer.read(invert) & 0xff);
                }
                if (!positive) {
                    factor = -factor;
                }
                final int decimalStart = inputByteBuffer.tell();
                int length = 0;
                do {
                    b = inputByteBuffer.read(positive ? invert : !invert);
                    if (b == 1) {
                        throw new IOException("Expected -1 and found byte value " + (int) b + " in binary sortable format data (invert " + invert + ")");
                    }
                    if (b == 0) {
                        // end of digits
                        break;
                    }
                    length++;
                } while (true);
                // CONSIDER: Allocate a larger initial size.
                if (tempDecimalBuffer == null || tempDecimalBuffer.length < length) {
                    tempDecimalBuffer = new byte[length];
                }
                inputByteBuffer.seek(decimalStart);
                for (int i = 0; i < length; ++i) {
                    tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert);
                }
                // read the null byte again
                inputByteBuffer.read(positive ? invert : !invert);
                // Set the value of the writable from the decimal digits that were written with no dot.
                final int scale = length - factor;
                currentHiveDecimalWritable.setFromDigitsOnlyBytesWithScale(!positive, tempDecimalBuffer, 0, length, scale);
                boolean decimalIsNull = !currentHiveDecimalWritable.isSet();
                if (!decimalIsNull) {
                    // We have a decimal.  After we enforce precision and scale, will it become a NULL?
                    final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) field.typeInfo;
                    final int enforcePrecision = decimalTypeInfo.getPrecision();
                    final int enforceScale = decimalTypeInfo.getScale();
                    decimalIsNull = !currentHiveDecimalWritable.mutateEnforcePrecisionScale(enforcePrecision, enforceScale);
                }
                if (decimalIsNull) {
                    return false;
                }
            }
            return true;
        default:
            throw new RuntimeException("Unexpected primitive type category " + field.primitiveCategory);
    }
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) IOException(java.io.IOException)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)44 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)33 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)31 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)27 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)26 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)26 ArrayList (java.util.ArrayList)22 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)20 BytesWritable (org.apache.hadoop.io.BytesWritable)19 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)18 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)18 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)18 Text (org.apache.hadoop.io.Text)18 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)17 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)17 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)16 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)16 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)16 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)16 IntWritable (org.apache.hadoop.io.IntWritable)16