Search in sources :

Example 21 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project flink by apache.

the class HiveParserCalcitePlanner method genSetOpLogicalPlan.

@SuppressWarnings("nls")
private RelNode genSetOpLogicalPlan(HiveParserQBExpr.Opcode opcode, String alias, String leftalias, RelNode leftRel, String rightalias, RelNode rightRel) throws SemanticException {
    // 1. Get Row Resolvers, Column map for original left and right input of SetOp Rel
    HiveParserRowResolver leftRR = relToRowResolver.get(leftRel);
    HiveParserRowResolver rightRR = relToRowResolver.get(rightRel);
    HashMap<String, ColumnInfo> leftMap = leftRR.getFieldMap(leftalias);
    HashMap<String, ColumnInfo> rightMap = rightRR.getFieldMap(rightalias);
    // 2. Validate that SetOp is feasible according to Hive (by using type info from RR)
    if (leftMap.size() != rightMap.size()) {
        throw new SemanticException("Schema of both sides of union should match.");
    }
    // 3. construct SetOp Output RR using original left & right Input
    HiveParserRowResolver setOpOutRR = new HiveParserRowResolver();
    Iterator<Map.Entry<String, ColumnInfo>> lIter = leftMap.entrySet().iterator();
    Iterator<Map.Entry<String, ColumnInfo>> rIter = rightMap.entrySet().iterator();
    while (lIter.hasNext()) {
        Map.Entry<String, ColumnInfo> lEntry = lIter.next();
        Map.Entry<String, ColumnInfo> rEntry = rIter.next();
        ColumnInfo lInfo = lEntry.getValue();
        ColumnInfo rInfo = rEntry.getValue();
        String field = lEntry.getKey();
        // try widening conversion, otherwise fail union
        TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType());
        if (commonTypeInfo == null) {
            HiveParserASTNode tabRef = getQB().getAliases().isEmpty() ? null : getQB().getParseInfo().getSrcForAlias(getQB().getAliases().get(0));
            throw new SemanticException(generateErrorMessage(tabRef, "Schema of both sides of setop should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table"));
        }
        ColumnInfo setOpColInfo = new ColumnInfo(lInfo);
        setOpColInfo.setType(commonTypeInfo);
        setOpOutRR.put(alias, field, setOpColInfo);
    }
    // 4. Determine which columns requires cast on left/right input (Calcite requires exact
    // types on both sides of SetOp)
    boolean leftNeedsTypeCast = false;
    boolean rightNeedsTypeCast = false;
    List<RexNode> leftProjs = new ArrayList<>();
    List<RexNode> rightProjs = new ArrayList<>();
    List<RelDataTypeField> leftFields = leftRel.getRowType().getFieldList();
    List<RelDataTypeField> rightFields = rightRel.getRowType().getFieldList();
    for (int i = 0; i < leftFields.size(); i++) {
        RelDataType leftFieldType = leftFields.get(i).getType();
        RelDataType rightFieldType = rightFields.get(i).getType();
        if (!leftFieldType.equals(rightFieldType)) {
            RelDataType unionFieldType = HiveParserUtils.toRelDataType(setOpOutRR.getColumnInfos().get(i).getType(), cluster.getTypeFactory());
            if (!unionFieldType.equals(leftFieldType)) {
                leftNeedsTypeCast = true;
            }
            leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldType, cluster.getRexBuilder().makeInputRef(leftFieldType, i), true));
            if (!unionFieldType.equals(rightFieldType)) {
                rightNeedsTypeCast = true;
            }
            rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldType, cluster.getRexBuilder().makeInputRef(rightFieldType, i), true));
        } else {
            leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldType, cluster.getRexBuilder().makeInputRef(leftFieldType, i), true));
            rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldType, cluster.getRexBuilder().makeInputRef(rightFieldType, i), true));
        }
    }
    // parity
    if (leftNeedsTypeCast) {
        leftRel = LogicalProject.create(leftRel, Collections.emptyList(), leftProjs, leftRel.getRowType().getFieldNames());
    }
    if (rightNeedsTypeCast) {
        rightRel = LogicalProject.create(rightRel, Collections.emptyList(), rightProjs, rightRel.getRowType().getFieldNames());
    }
    // 6. Construct SetOp Rel
    List<RelNode> leftAndRight = Arrays.asList(leftRel, rightRel);
    SetOp setOpRel;
    switch(opcode) {
        case UNION:
            setOpRel = LogicalUnion.create(leftAndRight, true);
            break;
        case INTERSECT:
            setOpRel = LogicalIntersect.create(leftAndRight, false);
            break;
        case INTERSECTALL:
            setOpRel = LogicalIntersect.create(leftAndRight, true);
            break;
        case EXCEPT:
            setOpRel = LogicalMinus.create(leftAndRight, false);
            break;
        case EXCEPTALL:
            setOpRel = LogicalMinus.create(leftAndRight, true);
            break;
        default:
            throw new SemanticException("Unsupported set operator " + opcode.toString());
    }
    relToRowResolver.put(setOpRel, setOpOutRR);
    relToHiveColNameCalcitePosMap.put(setOpRel, buildHiveToCalciteColumnMap(setOpOutRR));
    return setOpRel;
}
Also used : SetOp(org.apache.calcite.rel.core.SetOp) HiveParserASTNode(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelDataType(org.apache.calcite.rel.type.RelDataType) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) HiveParserRowResolver(org.apache.flink.table.planner.delegation.hive.copy.HiveParserRowResolver) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) HiveParserBaseSemanticAnalyzer.buildHiveToCalciteColumnMap(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.buildHiveToCalciteColumnMap) AbstractMap(java.util.AbstractMap) HiveParserBaseSemanticAnalyzer.buildHiveColNameToInputPosMap(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.buildHiveColNameToInputPosMap) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RexNode(org.apache.calcite.rex.RexNode)

Example 22 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project flink by apache.

the class HiveParserCalcitePlanner method genOBLogicalPlan.

private Pair<Sort, RelNode> genOBLogicalPlan(HiveParserQB qb, RelNode srcRel, boolean outermostOB) throws SemanticException {
    Sort sortRel = null;
    RelNode originalOBInput = null;
    HiveParserQBParseInfo qbp = qb.getParseInfo();
    String dest = qbp.getClauseNames().iterator().next();
    HiveParserASTNode obAST = qbp.getOrderByForClause(dest);
    if (obAST != null) {
        // 1. OB Expr sanity test
        // in strict mode, in the presence of order by, limit must be specified
        Integer limit = qb.getParseInfo().getDestLimit(dest);
        if (limit == null) {
            String mapRedMode = semanticAnalyzer.getConf().getVar(HiveConf.ConfVars.HIVEMAPREDMODE);
            boolean banLargeQuery = Boolean.parseBoolean(semanticAnalyzer.getConf().get("hive.strict.checks.large.query", "false"));
            if ("strict".equalsIgnoreCase(mapRedMode) || banLargeQuery) {
                throw new SemanticException(generateErrorMessage(obAST, "Order by-s without limit"));
            }
        }
        // 2. Walk through OB exprs and extract field collations and additional
        // virtual columns needed
        final List<RexNode> virtualCols = new ArrayList<>();
        final List<RelFieldCollation> fieldCollations = new ArrayList<>();
        int fieldIndex;
        List<Node> obASTExprLst = obAST.getChildren();
        HiveParserASTNode obASTExpr;
        HiveParserASTNode nullOrderASTExpr;
        List<Pair<HiveParserASTNode, TypeInfo>> vcASTAndType = new ArrayList<>();
        HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
        HiveParserRowResolver outputRR = new HiveParserRowResolver();
        HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), relToHiveColNameCalcitePosMap.get(srcRel), 0, false, funcConverter);
        int numSrcFields = srcRel.getRowType().getFieldCount();
        for (Node node : obASTExprLst) {
            // 2.1 Convert AST Expr to ExprNode
            obASTExpr = (HiveParserASTNode) node;
            nullOrderASTExpr = (HiveParserASTNode) obASTExpr.getChild(0);
            HiveParserASTNode ref = (HiveParserASTNode) nullOrderASTExpr.getChild(0);
            Map<HiveParserASTNode, ExprNodeDesc> astToExprNodeDesc = semanticAnalyzer.genAllExprNodeDesc(ref, inputRR);
            ExprNodeDesc obExprNodeDesc = astToExprNodeDesc.get(ref);
            if (obExprNodeDesc == null) {
                throw new SemanticException("Invalid order by expression: " + obASTExpr.toString());
            }
            // 2.2 Convert ExprNode to RexNode
            RexNode rexNode = converter.convert(obExprNodeDesc).accept(funcConverter);
            // present in the child (& hence we add a child Project Rel)
            if (rexNode instanceof RexInputRef) {
                fieldIndex = ((RexInputRef) rexNode).getIndex();
            } else {
                fieldIndex = numSrcFields + virtualCols.size();
                virtualCols.add(rexNode);
                vcASTAndType.add(new Pair<>(ref, obExprNodeDesc.getTypeInfo()));
            }
            // 2.4 Determine the Direction of order by
            RelFieldCollation.Direction direction = RelFieldCollation.Direction.DESCENDING;
            if (obASTExpr.getType() == HiveASTParser.TOK_TABSORTCOLNAMEASC) {
                direction = RelFieldCollation.Direction.ASCENDING;
            }
            RelFieldCollation.NullDirection nullOrder;
            if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_FIRST) {
                nullOrder = RelFieldCollation.NullDirection.FIRST;
            } else if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_LAST) {
                nullOrder = RelFieldCollation.NullDirection.LAST;
            } else {
                throw new SemanticException("Unexpected null ordering option: " + nullOrderASTExpr.getType());
            }
            // 2.5 Add to field collations
            fieldCollations.add(new RelFieldCollation(fieldIndex, direction, nullOrder));
        }
        // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel
        // for top constraining Sel
        RelNode obInputRel = srcRel;
        if (!virtualCols.isEmpty()) {
            List<RexNode> originalInputRefs = srcRel.getRowType().getFieldList().stream().map(input -> new RexInputRef(input.getIndex(), input.getType())).collect(Collectors.toList());
            HiveParserRowResolver obSyntheticProjectRR = new HiveParserRowResolver();
            if (!HiveParserRowResolver.add(obSyntheticProjectRR, inputRR)) {
                throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
            }
            int vcolPos = inputRR.getRowSchema().getSignature().size();
            for (Pair<HiveParserASTNode, TypeInfo> astTypePair : vcASTAndType) {
                obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo(getColumnInternalName(vcolPos), astTypePair.getValue(), null, false));
                vcolPos++;
            }
            obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, virtualCols), obSyntheticProjectRR, srcRel);
            if (outermostOB) {
                if (!HiveParserRowResolver.add(outputRR, inputRR)) {
                    throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
                }
            } else {
                if (!HiveParserRowResolver.add(outputRR, obSyntheticProjectRR)) {
                    throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
                }
            }
            originalOBInput = srcRel;
        } else {
            if (!HiveParserRowResolver.add(outputRR, inputRR)) {
                throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
            }
        }
        // 4. Construct SortRel
        RelTraitSet traitSet = cluster.traitSet();
        RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations));
        sortRel = LogicalSort.create(obInputRel, canonizedCollation, null, null);
        // 5. Update the maps
        Map<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
        relToRowResolver.put(sortRel, outputRR);
        relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
    }
    return (new Pair<>(sortRel, originalOBInput));
}
Also used : DataType(org.apache.flink.table.types.DataType) Arrays(java.util.Arrays) LogicalSort(org.apache.calcite.rel.logical.LogicalSort) FlinkPlannerImpl(org.apache.flink.table.planner.calcite.FlinkPlannerImpl) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) HiveParserBaseSemanticAnalyzer.getGroupByForClause(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getGroupByForClause) HiveParserContext(org.apache.flink.table.planner.delegation.hive.copy.HiveParserContext) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) RelCollationImpl(org.apache.calcite.rel.RelCollationImpl) BigDecimal(java.math.BigDecimal) HiveParserBaseSemanticAnalyzer.unescapeIdentifier(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.unescapeIdentifier) HiveInspectors(org.apache.flink.table.functions.hive.conversion.HiveInspectors) CorrelationId(org.apache.calcite.rel.core.CorrelationId) HiveParserASTNode(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode) Map(java.util.Map) HiveParserASTBuilder(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTBuilder) RelTraitSet(org.apache.calcite.plan.RelTraitSet) RexWindowBound(org.apache.calcite.rex.RexWindowBound) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HiveParserSubQueryUtils(org.apache.flink.table.planner.delegation.hive.copy.HiveParserSubQueryUtils) HiveParserBaseSemanticAnalyzer.getHiveAggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getHiveAggInfo) HiveTypeUtil(org.apache.flink.table.catalog.hive.util.HiveTypeUtil) HiveParserBaseSemanticAnalyzer(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) Set(java.util.Set) HiveParserUtils.generateErrorMessage(org.apache.flink.table.planner.delegation.hive.HiveParserUtils.generateErrorMessage) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) HiveASTParseUtils(org.apache.flink.table.planner.delegation.hive.copy.HiveASTParseUtils) HiveParserBaseSemanticAnalyzer.getGroupingSetsForCube(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getGroupingSetsForCube) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) HiveParserPreCboCtx(org.apache.flink.table.planner.delegation.hive.copy.HiveParserPreCboCtx) RelCollation(org.apache.calcite.rel.RelCollation) HiveParserBaseSemanticAnalyzer.getGroupingSets(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getGroupingSets) HiveParserBaseSemanticAnalyzer.getPartitionKeys(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getPartitionKeys) HiveParserBaseSemanticAnalyzer.removeOBInSubQuery(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.removeOBInSubQuery) HiveASTParser(org.apache.flink.table.planner.delegation.hive.parse.HiveASTParser) ErrorMsg(org.apache.hadoop.hive.ql.ErrorMsg) RexCall(org.apache.calcite.rex.RexCall) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) TypeInfoUtils(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) HiveParserJoinTypeCheckCtx(org.apache.flink.table.planner.delegation.hive.copy.HiveParserJoinTypeCheckCtx) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) AggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo) HiveParserBaseSemanticAnalyzer.getCorrelationUse(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getCorrelationUse) HiveASTParseDriver(org.apache.flink.table.planner.delegation.hive.copy.HiveASTParseDriver) RexFieldCollation(org.apache.calcite.rex.RexFieldCollation) ViewExpanders(org.apache.calcite.plan.ViewExpanders) LogicalValues(org.apache.calcite.rel.logical.LogicalValues) LogicalCorrelate(org.apache.calcite.rel.logical.LogicalCorrelate) HiveParserBaseSemanticAnalyzer.validateNoHavingReferenceToAlias(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.validateNoHavingReferenceToAlias) HiveParserNamedJoinInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserNamedJoinInfo) RelDataType(org.apache.calcite.rel.type.RelDataType) HiveParserUtils.rewriteGroupingFunctionAST(org.apache.flink.table.planner.delegation.hive.HiveParserUtils.rewriteGroupingFunctionAST) LogicalIntersect(org.apache.calcite.rel.logical.LogicalIntersect) HiveParserQBSubQuery(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQBSubQuery) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveParserRowResolver(org.apache.flink.table.planner.delegation.hive.copy.HiveParserRowResolver) Aggregate(org.apache.calcite.rel.core.Aggregate) HiveParserTypeCheckCtx(org.apache.flink.table.planner.delegation.hive.copy.HiveParserTypeCheckCtx) FrameworkConfig(org.apache.calcite.tools.FrameworkConfig) Node(org.apache.hadoop.hive.ql.lib.Node) HiveParserBaseSemanticAnalyzer.getBound(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getBound) HiveParserBaseSemanticAnalyzer.getColumnInternalName(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getColumnInternalName) HiveParserSqlFunctionConverter(org.apache.flink.table.planner.delegation.hive.copy.HiveParserSqlFunctionConverter) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) JoinRelType(org.apache.calcite.rel.core.JoinRelType) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) ArrayDeque(java.util.ArrayDeque) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo) HiveParserBaseSemanticAnalyzer.obtainTableType(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.obtainTableType) HiveParserBaseSemanticAnalyzer.convert(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.convert) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) RelFactories(org.apache.calcite.rel.core.RelFactories) LoggerFactory(org.slf4j.LoggerFactory) LogicalTableFunctionScan(org.apache.calcite.rel.logical.LogicalTableFunctionScan) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveParserBaseSemanticAnalyzer.processPositionAlias(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.processPositionAlias) HiveParserWindowingSpec(org.apache.flink.table.planner.delegation.hive.copy.HiveParserWindowingSpec) RexUtil(org.apache.calcite.rex.RexUtil) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) HiveParserErrorMsg(org.apache.flink.table.planner.delegation.hive.parse.HiveParserErrorMsg) RexNode(org.apache.calcite.rex.RexNode) LogicalUnion(org.apache.calcite.rel.logical.LogicalUnion) RelOptCluster(org.apache.calcite.plan.RelOptCluster) LogicalDistribution(org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution) RexLiteral(org.apache.calcite.rex.RexLiteral) HiveParserQBParseInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQBParseInfo) HiveParserTypeConverter(org.apache.flink.table.planner.delegation.hive.copy.HiveParserTypeConverter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Preconditions(org.apache.flink.util.Preconditions) CompositeList(org.apache.calcite.util.CompositeList) Collectors(java.util.stream.Collectors) RexInputRef(org.apache.calcite.rex.RexInputRef) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) List(java.util.List) Type(java.lang.reflect.Type) Sort(org.apache.calcite.rel.core.Sort) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) HiveParserQueryState(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQueryState) Project(org.apache.calcite.rel.core.Project) CatalogManager(org.apache.flink.table.catalog.CatalogManager) HiveParserBaseSemanticAnalyzer.getGroupingSetsForRollup(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getGroupingSetsForRollup) HiveParserBaseSemanticAnalyzer.getWindowSpecIndx(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getWindowSpecIndx) HashMap(java.util.HashMap) SetOp(org.apache.calcite.rel.core.SetOp) Deque(java.util.Deque) RelOptUtil(org.apache.calcite.plan.RelOptUtil) LogicalMinus(org.apache.calcite.rel.logical.LogicalMinus) DeduplicateCorrelateVariables(org.apache.calcite.sql2rel.DeduplicateCorrelateVariables) HiveParserBaseSemanticAnalyzer.initPhase1Ctx(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.initPhase1Ctx) PlannerContext(org.apache.flink.table.planner.delegation.PlannerContext) HashSet(java.util.HashSet) HiveParserQBExpr(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQBExpr) TableType(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.TableType) Pair(org.apache.calcite.util.Pair) HiveParserBaseSemanticAnalyzer.buildHiveToCalciteColumnMap(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.buildHiveToCalciteColumnMap) SqlOperator(org.apache.calcite.sql.SqlOperator) FlinkCalciteCatalogReader(org.apache.flink.table.planner.plan.FlinkCalciteCatalogReader) RelCollations(org.apache.calcite.rel.RelCollations) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) Logger(org.slf4j.Logger) HiveParserSemanticAnalyzer(org.apache.flink.table.planner.delegation.hive.copy.HiveParserSemanticAnalyzer) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) Iterator(java.util.Iterator) RexBuilder(org.apache.calcite.rex.RexBuilder) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HiveParserQB(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQB) RelNode(org.apache.calcite.rel.RelNode) HiveParserBaseSemanticAnalyzer.genValues(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.genValues) HiveParserUtils.verifyCanHandleAst(org.apache.flink.table.planner.delegation.hive.HiveParserUtils.verifyCanHandleAst) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) AbstractMap(java.util.AbstractMap) ObjectPair(org.apache.hadoop.hive.common.ObjectPair) HiveParserBaseSemanticAnalyzer.buildHiveColNameToInputPosMap(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.buildHiveColNameToInputPosMap) HiveParserCreateViewInfo(org.apache.flink.table.planner.delegation.hive.parse.HiveParserCreateViewInfo) SqlUserDefinedTableFunction(org.apache.calcite.sql.validate.SqlUserDefinedTableFunction) HiveParserBaseSemanticAnalyzer.topLevelConjunctCheck(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.topLevelConjunctCheck) Util(org.apache.calcite.util.Util) HiveParserBaseSemanticAnalyzer.addToGBExpr(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.addToGBExpr) Collections(java.util.Collections) HiveParserBaseSemanticAnalyzer.getOrderKeys(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getOrderKeys) HiveParserASTNode(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode) Node(org.apache.hadoop.hive.ql.lib.Node) RexNode(org.apache.calcite.rex.RexNode) RelNode(org.apache.calcite.rel.RelNode) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelTraitSet(org.apache.calcite.plan.RelTraitSet) HiveParserRowResolver(org.apache.flink.table.planner.delegation.hive.copy.HiveParserRowResolver) LogicalSort(org.apache.calcite.rel.logical.LogicalSort) Sort(org.apache.calcite.rel.core.Sort) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Pair(org.apache.calcite.util.Pair) ObjectPair(org.apache.hadoop.hive.common.ObjectPair) HiveParserASTNode(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) RelCollation(org.apache.calcite.rel.RelCollation) HiveParserQBParseInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQBParseInfo) RelNode(org.apache.calcite.rel.RelNode) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Example 23 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project flink by apache.

the class HiveParserCalcitePlanner method genUDTFPlan.

private RelNode genUDTFPlan(SqlOperator sqlOperator, String genericUDTFName, String outputTableAlias, List<String> colAliases, HiveParserQB qb, List<RexNode> operands, List<ColumnInfo> opColInfos, RelNode input, boolean inSelect, boolean isOuter) throws SemanticException {
    Preconditions.checkState(!isOuter || !inSelect, "OUTER is not supported for SELECT UDTF");
    // No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
    HiveParserQBParseInfo qbp = qb.getParseInfo();
    if (inSelect && !qbp.getDestToGroupBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
    }
    if (inSelect && !qbp.getDestToDistributeBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
    }
    if (inSelect && !qbp.getDestToSortBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
    }
    if (inSelect && !qbp.getDestToClusterBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
    }
    if (inSelect && !qbp.getAliasToLateralViews().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
    }
    LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
    // Create the object inspector for the input columns and initialize the UDTF
    RelDataType relDataType = HiveParserUtils.inferReturnTypeForOperands(sqlOperator, operands, cluster.getTypeFactory());
    DataType dataType = HiveParserUtils.toDataType(relDataType);
    StructObjectInspector outputOI = (StructObjectInspector) HiveInspectors.getObjectInspector(HiveTypeUtil.toHiveTypeInfo(dataType, false));
    // this should only happen for select udtf
    if (outputTableAlias == null) {
        Preconditions.checkState(inSelect, "Table alias not specified for lateral view");
        String prefix = "select_" + genericUDTFName + "_alias_";
        int i = 0;
        while (qb.getAliases().contains(prefix + i)) {
            i++;
        }
        outputTableAlias = prefix + i;
    }
    if (colAliases.isEmpty()) {
        // user did not specify alias names, infer names from outputOI
        for (StructField field : outputOI.getAllStructFieldRefs()) {
            colAliases.add(field.getFieldName());
        }
    }
    // Make sure that the number of column aliases in the AS clause matches the number of
    // columns output by the UDTF
    int numOutputCols = outputOI.getAllStructFieldRefs().size();
    int numSuppliedAliases = colAliases.size();
    if (numOutputCols != numSuppliedAliases) {
        throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numOutputCols + " aliases " + "but got " + numSuppliedAliases));
    }
    // Generate the output column info's / row resolver using internal names.
    ArrayList<ColumnInfo> udtfOutputCols = new ArrayList<>();
    Iterator<String> colAliasesIter = colAliases.iterator();
    for (StructField sf : outputOI.getAllStructFieldRefs()) {
        String colAlias = colAliasesIter.next();
        assert (colAlias != null);
        // Since the UDTF operator feeds into a LVJ operator that will rename all the internal
        // names,
        // we can just use field name from the UDTF's OI as the internal name
        ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
        udtfOutputCols.add(col);
    }
    // Create the row resolver for the table function scan
    HiveParserRowResolver udtfOutRR = new HiveParserRowResolver();
    for (int i = 0; i < udtfOutputCols.size(); i++) {
        udtfOutRR.put(outputTableAlias, colAliases.get(i), udtfOutputCols.get(i));
    }
    // Build row type from field <type, name>
    RelDataType retType = HiveParserTypeConverter.getType(cluster, udtfOutRR, null);
    List<RelDataType> argTypes = new ArrayList<>();
    RelDataTypeFactory dtFactory = cluster.getRexBuilder().getTypeFactory();
    for (ColumnInfo ci : opColInfos) {
        argTypes.add(HiveParserUtils.toRelDataType(ci.getType(), dtFactory));
    }
    SqlOperator calciteOp = HiveParserSqlFunctionConverter.getCalciteFn(genericUDTFName, argTypes, retType, false);
    RexNode rexNode = cluster.getRexBuilder().makeCall(calciteOp, operands);
    // convert the rex call
    TableFunctionConverter udtfConverter = new TableFunctionConverter(cluster, input, frameworkConfig.getOperatorTable(), catalogReader.nameMatcher());
    RexCall convertedCall = (RexCall) rexNode.accept(udtfConverter);
    SqlOperator convertedOperator = convertedCall.getOperator();
    Preconditions.checkState(convertedOperator instanceof SqlUserDefinedTableFunction, "Expect operator to be " + SqlUserDefinedTableFunction.class.getSimpleName() + ", actually got " + convertedOperator.getClass().getSimpleName());
    // TODO: how to decide this?
    Type elementType = Object[].class;
    // create LogicalTableFunctionScan
    RelNode tableFunctionScan = LogicalTableFunctionScan.create(input.getCluster(), Collections.emptyList(), convertedCall, elementType, retType, null);
    // remember the table alias for the UDTF so that we can reference the cols later
    qb.addAlias(outputTableAlias);
    RelNode correlRel;
    RexBuilder rexBuilder = cluster.getRexBuilder();
    // find correlation in the converted call
    Pair<List<CorrelationId>, ImmutableBitSet> correlUse = getCorrelationUse(convertedCall);
    // create correlate node
    if (correlUse == null) {
        correlRel = plannerContext.createRelBuilder(catalogManager.getCurrentCatalog(), catalogManager.getCurrentDatabase()).push(input).push(tableFunctionScan).join(isOuter ? JoinRelType.LEFT : JoinRelType.INNER, rexBuilder.makeLiteral(true)).build();
    } else {
        if (correlUse.left.size() > 1) {
            tableFunctionScan = DeduplicateCorrelateVariables.go(rexBuilder, correlUse.left.get(0), Util.skip(correlUse.left), tableFunctionScan);
        }
        correlRel = LogicalCorrelate.create(input, tableFunctionScan, correlUse.left.get(0), correlUse.right, isOuter ? JoinRelType.LEFT : JoinRelType.INNER);
    }
    // Add new rel & its RR to the maps
    relToHiveColNameCalcitePosMap.put(tableFunctionScan, buildHiveToCalciteColumnMap(udtfOutRR));
    relToRowResolver.put(tableFunctionScan, udtfOutRR);
    HiveParserRowResolver correlRR = HiveParserRowResolver.getCombinedRR(relToRowResolver.get(input), relToRowResolver.get(tableFunctionScan));
    relToHiveColNameCalcitePosMap.put(correlRel, buildHiveToCalciteColumnMap(correlRR));
    relToRowResolver.put(correlRel, correlRR);
    if (!inSelect) {
        return correlRel;
    }
    // create project node
    List<RexNode> projects = new ArrayList<>();
    HiveParserRowResolver projectRR = new HiveParserRowResolver();
    int j = 0;
    for (int i = input.getRowType().getFieldCount(); i < correlRel.getRowType().getFieldCount(); i++) {
        projects.add(cluster.getRexBuilder().makeInputRef(correlRel, i));
        ColumnInfo inputColInfo = correlRR.getRowSchema().getSignature().get(i);
        String colAlias = inputColInfo.getAlias();
        ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(j++), inputColInfo.getObjectInspector(), null, false);
        projectRR.put(null, colAlias, colInfo);
    }
    RelNode projectNode = LogicalProject.create(correlRel, Collections.emptyList(), projects, tableFunctionScan.getRowType());
    relToHiveColNameCalcitePosMap.put(projectNode, buildHiveToCalciteColumnMap(projectRR));
    relToRowResolver.put(projectNode, projectRR);
    return projectNode;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) SqlOperator(org.apache.calcite.sql.SqlOperator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelDataType(org.apache.calcite.rel.type.RelDataType) RexCall(org.apache.calcite.rex.RexCall) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SqlUserDefinedTableFunction(org.apache.calcite.sql.validate.SqlUserDefinedTableFunction) HiveParserRowResolver(org.apache.flink.table.planner.delegation.hive.copy.HiveParserRowResolver) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) DataType(org.apache.flink.table.types.DataType) RelDataType(org.apache.calcite.rel.type.RelDataType) RexBuilder(org.apache.calcite.rex.RexBuilder) ArrayList(java.util.ArrayList) CompositeList(org.apache.calcite.util.CompositeList) List(java.util.List) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) DataType(org.apache.flink.table.types.DataType) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) RelDataType(org.apache.calcite.rel.type.RelDataType) JoinRelType(org.apache.calcite.rel.core.JoinRelType) HiveParserBaseSemanticAnalyzer.obtainTableType(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.obtainTableType) Type(java.lang.reflect.Type) TableType(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.TableType) HiveParserQBParseInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQBParseInfo) RelNode(org.apache.calcite.rel.RelNode) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) RexNode(org.apache.calcite.rex.RexNode)

Example 24 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class AccumuloRowSerializer method serializeRowId.

/**
 * Serialize an Accumulo rowid
 */
protected byte[] serializeRowId(Object rowId, StructField rowIdField, ColumnMapping rowIdMapping) throws IOException {
    if (rowId == null) {
        throw new IOException("Accumulo rowId cannot be NULL");
    }
    // Reset the buffer we're going to use
    output.reset();
    ObjectInspector rowIdFieldOI = rowIdField.getFieldObjectInspector();
    String rowIdMappingType = rowIdMapping.getColumnType();
    TypeInfo rowIdTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(rowIdMappingType);
    if (!rowIdFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && rowIdTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
        // we always serialize the String type using the escaped algorithm for LazyString
        writeString(output, SerDeUtils.getJSONString(rowId, rowIdFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return output.toByteArray();
    }
    // use the serialization option switch to write primitive values as either a variable
    // length UTF8 string or a fixed width bytes if serializing in binary format
    getSerializedValue(rowIdFieldOI, rowId, output, rowIdMapping);
    return output.toByteArray();
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) IOException(java.io.IOException) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 25 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class AccumuloRowSerializer method writeWithLevel.

/**
 * Recursively serialize an Object using its {@link ObjectInspector}, respecting the
 * separators defined by the {@link LazySerDeParameters}.
 * @param oi ObjectInspector for the current object
 * @param value The current object
 * @param output A buffer output is written to
 * @param mapping The mapping for this Hive column
 * @param level The current level/offset for the SerDe separator
 * @throws IOException
 */
protected void writeWithLevel(ObjectInspector oi, Object value, ByteStream.Output output, ColumnMapping mapping, int level) throws IOException {
    switch(oi.getCategory()) {
        case PRIMITIVE:
            if (mapping.getEncoding() == ColumnEncoding.BINARY) {
                this.writeBinary(output, value, (PrimitiveObjectInspector) oi);
            } else {
                this.writeString(output, value, (PrimitiveObjectInspector) oi);
            }
            return;
        case LIST:
            char separator = (char) serDeParams.getSeparators()[level];
            ListObjectInspector loi = (ListObjectInspector) oi;
            List<?> list = loi.getList(value);
            ObjectInspector eoi = loi.getListElementObjectInspector();
            if (list == null) {
                log.debug("No objects found when serializing list");
                return;
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        output.write(separator);
                    }
                    writeWithLevel(eoi, list.get(i), output, mapping, level + 1);
                }
            }
            return;
        case MAP:
            char sep = (char) serDeParams.getSeparators()[level];
            char keyValueSeparator = (char) serDeParams.getSeparators()[level + 1];
            MapObjectInspector moi = (MapObjectInspector) oi;
            ObjectInspector koi = moi.getMapKeyObjectInspector();
            ObjectInspector voi = moi.getMapValueObjectInspector();
            Map<?, ?> map = moi.getMap(value);
            if (map == null) {
                log.debug("No object found when serializing map");
                return;
            } else {
                boolean first = true;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    if (first) {
                        first = false;
                    } else {
                        output.write(sep);
                    }
                    writeWithLevel(koi, entry.getKey(), output, mapping, level + 2);
                    output.write(keyValueSeparator);
                    writeWithLevel(voi, entry.getValue(), output, mapping, level + 2);
                }
            }
            return;
        case STRUCT:
            sep = (char) serDeParams.getSeparators()[level];
            StructObjectInspector soi = (StructObjectInspector) oi;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();
            list = soi.getStructFieldsDataAsList(value);
            if (list == null) {
                log.debug("No object found when serializing struct");
                return;
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        output.write(sep);
                    }
                    writeWithLevel(fields.get(i).getFieldObjectInspector(), list.get(i), output, mapping, level + 1);
                }
            }
            return;
        default:
            throw new RuntimeException("Unknown category type: " + oi.getCategory());
    }
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)77 ArrayList (java.util.ArrayList)72 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)48 Test (org.junit.Test)44 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)43 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)42 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)41 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)41 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)37 Text (org.apache.hadoop.io.Text)35 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)32 IOException (java.io.IOException)29 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)24 BytesWritable (org.apache.hadoop.io.BytesWritable)23 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)22 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)22 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)21 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)21 List (java.util.List)18