use of org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution in project flink by apache.
the class HiveParserDMLHelper method createInsertOperationInfo.
public Tuple4<ObjectIdentifier, QueryOperation, Map<String, String>, Boolean> createInsertOperationInfo(RelNode queryRelNode, Table destTable, Map<String, String> staticPartSpec, List<String> destSchema, boolean overwrite) throws SemanticException {
// sanity check
Preconditions.checkArgument(queryRelNode instanceof Project || queryRelNode instanceof Sort || queryRelNode instanceof LogicalDistribution, "Expect top RelNode to be Project, Sort, or LogicalDistribution, actually got " + queryRelNode);
if (!(queryRelNode instanceof Project)) {
RelNode parent = ((SingleRel) queryRelNode).getInput();
// SEL + SORT or SEL + DIST + LIMIT
Preconditions.checkArgument(parent instanceof Project || parent instanceof LogicalDistribution, "Expect input to be a Project or LogicalDistribution, actually got " + parent);
if (parent instanceof LogicalDistribution) {
RelNode grandParent = ((LogicalDistribution) parent).getInput();
Preconditions.checkArgument(grandParent instanceof Project, "Expect input of LogicalDistribution to be a Project, actually got " + grandParent);
}
}
// handle dest schema, e.g. insert into dest(.,.,.) select ...
queryRelNode = handleDestSchema((SingleRel) queryRelNode, destTable, destSchema, staticPartSpec.keySet());
// track each target col and its expected type
RelDataTypeFactory typeFactory = plannerContext.getTypeFactory();
LinkedHashMap<String, RelDataType> targetColToCalcType = new LinkedHashMap<>();
List<TypeInfo> targetHiveTypes = new ArrayList<>();
List<FieldSchema> allCols = new ArrayList<>(destTable.getCols());
allCols.addAll(destTable.getPartCols());
for (FieldSchema col : allCols) {
TypeInfo hiveType = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
targetHiveTypes.add(hiveType);
targetColToCalcType.put(col.getName(), HiveParserTypeConverter.convert(hiveType, typeFactory));
}
// add static partitions to query source
if (!staticPartSpec.isEmpty()) {
if (queryRelNode instanceof Project) {
queryRelNode = replaceProjectForStaticPart((Project) queryRelNode, staticPartSpec, destTable, targetColToCalcType);
} else if (queryRelNode instanceof Sort) {
Sort sort = (Sort) queryRelNode;
RelNode oldInput = sort.getInput();
RelNode newInput;
if (oldInput instanceof LogicalDistribution) {
newInput = replaceDistForStaticParts((LogicalDistribution) oldInput, destTable, staticPartSpec, targetColToCalcType);
} else {
newInput = replaceProjectForStaticPart((Project) oldInput, staticPartSpec, destTable, targetColToCalcType);
// we may need to shift the field collations
final int numDynmPart = destTable.getTTable().getPartitionKeys().size() - staticPartSpec.size();
if (!sort.getCollation().getFieldCollations().isEmpty() && numDynmPart > 0) {
sort.replaceInput(0, null);
sort = LogicalSort.create(newInput, shiftRelCollation(sort.getCollation(), (Project) oldInput, staticPartSpec.size(), numDynmPart), sort.offset, sort.fetch);
}
}
sort.replaceInput(0, newInput);
queryRelNode = sort;
} else {
queryRelNode = replaceDistForStaticParts((LogicalDistribution) queryRelNode, destTable, staticPartSpec, targetColToCalcType);
}
}
// add type conversions
queryRelNode = addTypeConversions(plannerContext.getCluster().getRexBuilder(), queryRelNode, new ArrayList<>(targetColToCalcType.values()), targetHiveTypes, funcConverter);
// create identifier
List<String> targetTablePath = Arrays.asList(destTable.getDbName(), destTable.getTableName());
UnresolvedIdentifier unresolvedIdentifier = UnresolvedIdentifier.of(targetTablePath);
ObjectIdentifier identifier = catalogManager.qualifyIdentifier(unresolvedIdentifier);
return Tuple4.of(identifier, new PlannerQueryOperation(queryRelNode), staticPartSpec, overwrite);
}
use of org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution in project flink by apache.
the class HiveParserDMLHelper method handleDestSchema.
private RelNode handleDestSchema(SingleRel queryRelNode, Table destTable, List<String> destSchema, Set<String> staticParts) throws SemanticException {
if (destSchema == null || destSchema.isEmpty()) {
return queryRelNode;
}
// natural schema should contain regular cols + dynamic cols
List<FieldSchema> naturalSchema = new ArrayList<>(destTable.getCols());
if (destTable.isPartitioned()) {
naturalSchema.addAll(destTable.getTTable().getPartitionKeys().stream().filter(f -> !staticParts.contains(f.getName())).collect(Collectors.toList()));
}
// we don't need to do anything if the dest schema is the same as natural schema
if (destSchema.equals(HiveCatalog.getFieldNames(naturalSchema))) {
return queryRelNode;
}
// build a list to create a Project on top of original Project
// for each col in dest table, if it's in dest schema, store its corresponding index in the
// dest schema, otherwise store its type and we'll create NULL for it
List<Object> updatedIndices = new ArrayList<>(naturalSchema.size());
for (FieldSchema col : naturalSchema) {
int index = destSchema.indexOf(col.getName());
if (index < 0) {
updatedIndices.add(HiveParserTypeConverter.convert(TypeInfoUtils.getTypeInfoFromTypeString(col.getType()), plannerContext.getTypeFactory()));
} else {
updatedIndices.add(index);
}
}
if (queryRelNode instanceof Project) {
return addProjectForDestSchema((Project) queryRelNode, updatedIndices);
} else if (queryRelNode instanceof Sort) {
Sort sort = (Sort) queryRelNode;
RelNode sortInput = sort.getInput();
// DIST + LIMIT
if (sortInput instanceof LogicalDistribution) {
RelNode newDist = handleDestSchemaForDist((LogicalDistribution) sortInput, updatedIndices);
sort.replaceInput(0, newDist);
return sort;
}
// PROJECT + SORT
RelNode addedProject = addProjectForDestSchema((Project) sortInput, updatedIndices);
// we may need to update the field collations
List<RelFieldCollation> fieldCollations = sort.getCollation().getFieldCollations();
if (!fieldCollations.isEmpty()) {
sort.replaceInput(0, null);
sort = LogicalSort.create(addedProject, updateRelCollation(sort.getCollation(), updatedIndices), sort.offset, sort.fetch);
}
sort.replaceInput(0, addedProject);
return sort;
} else {
// PROJECT + DIST
return handleDestSchemaForDist((LogicalDistribution) queryRelNode, updatedIndices);
}
}
Aggregations