Examples with LogicalDistribution - org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution

Example 1 with LogicalDistribution

use of org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution in project flink by apache.

the class HiveParserDMLHelper method createInsertOperationInfo.

public Tuple4<ObjectIdentifier, QueryOperation, Map<String, String>, Boolean> createInsertOperationInfo(RelNode queryRelNode, Table destTable, Map<String, String> staticPartSpec, List<String> destSchema, boolean overwrite) throws SemanticException {
    // sanity check
    Preconditions.checkArgument(queryRelNode instanceof Project || queryRelNode instanceof Sort || queryRelNode instanceof LogicalDistribution, "Expect top RelNode to be Project, Sort, or LogicalDistribution, actually got " + queryRelNode);
    if (!(queryRelNode instanceof Project)) {
        RelNode parent = ((SingleRel) queryRelNode).getInput();
        // SEL + SORT or SEL + DIST + LIMIT
        Preconditions.checkArgument(parent instanceof Project || parent instanceof LogicalDistribution, "Expect input to be a Project or LogicalDistribution, actually got " + parent);
        if (parent instanceof LogicalDistribution) {
            RelNode grandParent = ((LogicalDistribution) parent).getInput();
            Preconditions.checkArgument(grandParent instanceof Project, "Expect input of LogicalDistribution to be a Project, actually got " + grandParent);
        }
    }
    // handle dest schema, e.g. insert into dest(.,.,.) select ...
    queryRelNode = handleDestSchema((SingleRel) queryRelNode, destTable, destSchema, staticPartSpec.keySet());
    // track each target col and its expected type
    RelDataTypeFactory typeFactory = plannerContext.getTypeFactory();
    LinkedHashMap<String, RelDataType> targetColToCalcType = new LinkedHashMap<>();
    List<TypeInfo> targetHiveTypes = new ArrayList<>();
    List<FieldSchema> allCols = new ArrayList<>(destTable.getCols());
    allCols.addAll(destTable.getPartCols());
    for (FieldSchema col : allCols) {
        TypeInfo hiveType = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
        targetHiveTypes.add(hiveType);
        targetColToCalcType.put(col.getName(), HiveParserTypeConverter.convert(hiveType, typeFactory));
    }
    // add static partitions to query source
    if (!staticPartSpec.isEmpty()) {
        if (queryRelNode instanceof Project) {
            queryRelNode = replaceProjectForStaticPart((Project) queryRelNode, staticPartSpec, destTable, targetColToCalcType);
        } else if (queryRelNode instanceof Sort) {
            Sort sort = (Sort) queryRelNode;
            RelNode oldInput = sort.getInput();
            RelNode newInput;
            if (oldInput instanceof LogicalDistribution) {
                newInput = replaceDistForStaticParts((LogicalDistribution) oldInput, destTable, staticPartSpec, targetColToCalcType);
            } else {
                newInput = replaceProjectForStaticPart((Project) oldInput, staticPartSpec, destTable, targetColToCalcType);
                // we may need to shift the field collations
                final int numDynmPart = destTable.getTTable().getPartitionKeys().size() - staticPartSpec.size();
                if (!sort.getCollation().getFieldCollations().isEmpty() && numDynmPart > 0) {
                    sort.replaceInput(0, null);
                    sort = LogicalSort.create(newInput, shiftRelCollation(sort.getCollation(), (Project) oldInput, staticPartSpec.size(), numDynmPart), sort.offset, sort.fetch);
                }
            }
            sort.replaceInput(0, newInput);
            queryRelNode = sort;
        } else {
            queryRelNode = replaceDistForStaticParts((LogicalDistribution) queryRelNode, destTable, staticPartSpec, targetColToCalcType);
        }
    }
    // add type conversions
    queryRelNode = addTypeConversions(plannerContext.getCluster().getRexBuilder(), queryRelNode, new ArrayList<>(targetColToCalcType.values()), targetHiveTypes, funcConverter);
    // create identifier
    List<String> targetTablePath = Arrays.asList(destTable.getDbName(), destTable.getTableName());
    UnresolvedIdentifier unresolvedIdentifier = UnresolvedIdentifier.of(targetTablePath);
    ObjectIdentifier identifier = catalogManager.qualifyIdentifier(unresolvedIdentifier);
    return Tuple4.of(identifier, new PlannerQueryOperation(queryRelNode), staticPartSpec, overwrite);
}

Also used : PlannerQueryOperation(org.apache.flink.table.planner.operations.PlannerQueryOperation) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) UnresolvedIdentifier(org.apache.flink.table.catalog.UnresolvedIdentifier) RelDataType(org.apache.calcite.rel.type.RelDataType) SingleRel(org.apache.calcite.rel.SingleRel) LogicalDistribution(org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) LinkedHashMap(java.util.LinkedHashMap) Project(org.apache.calcite.rel.core.Project) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) RelNode(org.apache.calcite.rel.RelNode) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) LogicalSort(org.apache.calcite.rel.logical.LogicalSort) Sort(org.apache.calcite.rel.core.Sort) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier)

Example 2 with LogicalDistribution

use of org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution in project flink by apache.

the class HiveParserDMLHelper method handleDestSchema.

private RelNode handleDestSchema(SingleRel queryRelNode, Table destTable, List<String> destSchema, Set<String> staticParts) throws SemanticException {
    if (destSchema == null || destSchema.isEmpty()) {
        return queryRelNode;
    }
    // natural schema should contain regular cols + dynamic cols
    List<FieldSchema> naturalSchema = new ArrayList<>(destTable.getCols());
    if (destTable.isPartitioned()) {
        naturalSchema.addAll(destTable.getTTable().getPartitionKeys().stream().filter(f -> !staticParts.contains(f.getName())).collect(Collectors.toList()));
    }
    // we don't need to do anything if the dest schema is the same as natural schema
    if (destSchema.equals(HiveCatalog.getFieldNames(naturalSchema))) {
        return queryRelNode;
    }
    // build a list to create a Project on top of original Project
    // for each col in dest table, if it's in dest schema, store its corresponding index in the
    // dest schema, otherwise store its type and we'll create NULL for it
    List<Object> updatedIndices = new ArrayList<>(naturalSchema.size());
    for (FieldSchema col : naturalSchema) {
        int index = destSchema.indexOf(col.getName());
        if (index < 0) {
            updatedIndices.add(HiveParserTypeConverter.convert(TypeInfoUtils.getTypeInfoFromTypeString(col.getType()), plannerContext.getTypeFactory()));
        } else {
            updatedIndices.add(index);
        }
    }
    if (queryRelNode instanceof Project) {
        return addProjectForDestSchema((Project) queryRelNode, updatedIndices);
    } else if (queryRelNode instanceof Sort) {
        Sort sort = (Sort) queryRelNode;
        RelNode sortInput = sort.getInput();
        // DIST + LIMIT
        if (sortInput instanceof LogicalDistribution) {
            RelNode newDist = handleDestSchemaForDist((LogicalDistribution) sortInput, updatedIndices);
            sort.replaceInput(0, newDist);
            return sort;
        }
        // PROJECT + SORT
        RelNode addedProject = addProjectForDestSchema((Project) sortInput, updatedIndices);
        // we may need to update the field collations
        List<RelFieldCollation> fieldCollations = sort.getCollation().getFieldCollations();
        if (!fieldCollations.isEmpty()) {
            sort.replaceInput(0, null);
            sort = LogicalSort.create(addedProject, updateRelCollation(sort.getCollation(), updatedIndices), sort.offset, sort.fetch);
        }
        sort.replaceInput(0, addedProject);
        return sort;
    } else {
        // PROJECT + DIST
        return handleDestSchemaForDist((LogicalDistribution) queryRelNode, updatedIndices);
    }
}

Also used : Project(org.apache.calcite.rel.core.Project) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) RelNode(org.apache.calcite.rel.RelNode) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) LogicalSort(org.apache.calcite.rel.logical.LogicalSort) Sort(org.apache.calcite.rel.core.Sort) List(java.util.List) ArrayList(java.util.ArrayList) LogicalDistribution(org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution)

Aggregations

ArrayList (java.util.ArrayList)2 RelNode (org.apache.calcite.rel.RelNode)2 Project (org.apache.calcite.rel.core.Project)2 Sort (org.apache.calcite.rel.core.Sort)2 LogicalProject (org.apache.calcite.rel.logical.LogicalProject)2 LogicalSort (org.apache.calcite.rel.logical.LogicalSort)2 LogicalDistribution (org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 SingleRel (org.apache.calcite.rel.SingleRel)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)1 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)1 UnresolvedIdentifier (org.apache.flink.table.catalog.UnresolvedIdentifier)1 PlannerQueryOperation (org.apache.flink.table.planner.operations.PlannerQueryOperation)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1