use of org.apache.flink.api.common.operators.Ordering in project flink by apache.
the class GroupReduceNode method initPossibleProperties.
private List<OperatorDescriptorSingle> initPossibleProperties(Partitioner<?> customPartitioner) {
// see if an internal hint dictates the strategy to use
final Configuration conf = getOperator().getParameters();
final String localStrategy = conf.getString(Optimizer.HINT_LOCAL_STRATEGY, null);
final boolean useCombiner;
if (localStrategy != null) {
if (Optimizer.HINT_LOCAL_STRATEGY_SORT.equals(localStrategy)) {
useCombiner = false;
} else if (Optimizer.HINT_LOCAL_STRATEGY_COMBINING_SORT.equals(localStrategy)) {
if (!isCombineable()) {
Optimizer.LOG.warn("Strategy hint for GroupReduce '" + getOperator().getName() + "' requires combinable reduce, but user function is not marked combinable.");
}
useCombiner = true;
} else {
throw new CompilerException("Invalid local strategy hint for match contract: " + localStrategy);
}
} else {
useCombiner = isCombineable();
}
// check if we can work with a grouping (simple reducer), or if we need ordering because of a group order
Ordering groupOrder = null;
if (getOperator() instanceof GroupReduceOperatorBase) {
groupOrder = getOperator().getGroupOrder();
if (groupOrder != null && groupOrder.getNumberOfFields() == 0) {
groupOrder = null;
}
}
OperatorDescriptorSingle props = useCombiner ? (this.keys == null ? new AllGroupWithPartialPreGroupProperties() : new GroupReduceWithCombineProperties(this.keys, groupOrder, customPartitioner)) : (this.keys == null ? new AllGroupReduceProperties() : new GroupReduceProperties(this.keys, groupOrder, customPartitioner));
return Collections.singletonList(props);
}
use of org.apache.flink.api.common.operators.Ordering in project flink by apache.
the class RangePartitionRewriter method rewriteRangePartitionChannel.
private List<Channel> rewriteRangePartitionChannel(Channel channel) {
final List<Channel> sourceNewOutputChannels = new ArrayList<>();
final PlanNode sourceNode = channel.getSource();
final PlanNode targetNode = channel.getTarget();
final int sourceParallelism = sourceNode.getParallelism();
final int targetParallelism = targetNode.getParallelism();
final Costs defaultZeroCosts = new Costs(0, 0, 0);
final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
// 1. Fixed size sample in each partitions.
final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
sipNode.setParallelism(sourceParallelism);
sipPlanNode.setParallelism(sourceParallelism);
sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sipPlanNode.setCosts(defaultZeroCosts);
sipChannel.setTarget(sipPlanNode);
this.plan.getAllNodes().add(sipPlanNode);
sourceNewOutputChannels.add(sipChannel);
// 2. Fixed size sample in a single coordinator.
final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
sicNode.setParallelism(1);
sicPlanNode.setParallelism(1);
sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sicPlanNode.setCosts(defaultZeroCosts);
sicChannel.setTarget(sicPlanNode);
sipPlanNode.addOutgoingChannel(sicChannel);
this.plan.getAllNodes().add(sicPlanNode);
// 3. Use sampled data to build range boundaries.
final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
rbNode.setParallelism(1);
rbPlanNode.setParallelism(1);
rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
rbPlanNode.setCosts(defaultZeroCosts);
rbChannel.setTarget(rbPlanNode);
sicPlanNode.addOutgoingChannel(rbChannel);
this.plan.getAllNodes().add(rbPlanNode);
// 4. Take range boundaries as broadcast input and take the tuple of partition id and record as output.
final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
// To avoid deadlock, set the DataExchangeMode of channel between source node and this to Batch.
ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
ariNode.setParallelism(sourceParallelism);
ariPlanNode.setParallelism(sourceParallelism);
ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
ariPlanNode.setCosts(defaultZeroCosts);
ariChannel.setTarget(ariPlanNode);
this.plan.getAllNodes().add(ariPlanNode);
sourceNewOutputChannels.add(ariChannel);
final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
broadcastChannel.setTarget(ariPlanNode);
List<NamedChannel> broadcastChannels = new ArrayList<>(1);
broadcastChannels.add(broadcastChannel);
ariPlanNode.setBroadcastInputs(broadcastChannels);
// 5. Remove the partition id.
final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
final FieldList keys = new FieldList(0);
partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
ariPlanNode.addOutgoingChannel(partChannel);
final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
final MapNode prRemoverNode = new MapNode(prOperatorBase);
final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
partChannel.setTarget(prPlanNode);
prRemoverNode.setParallelism(targetParallelism);
prPlanNode.setParallelism(targetParallelism);
GlobalProperties globalProperties = new GlobalProperties();
globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
prPlanNode.initProperties(globalProperties, new LocalProperties());
prPlanNode.setCosts(defaultZeroCosts);
this.plan.getAllNodes().add(prPlanNode);
// 6. Connect to target node.
channel.setSource(prPlanNode);
channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
prPlanNode.addOutgoingChannel(channel);
return sourceNewOutputChannels;
}
use of org.apache.flink.api.common.operators.Ordering in project flink by apache.
the class LocalProperties method filterBySemanticProperties.
// --------------------------------------------------------------------------------------------
/**
* Filters these LocalProperties by the fields that are forwarded to the output
* as described by the SemanticProperties.
*
* @param props The semantic properties holding information about forwarded fields.
* @param input The index of the input.
* @return The filtered LocalProperties
*/
public LocalProperties filterBySemanticProperties(SemanticProperties props, int input) {
if (props == null) {
throw new NullPointerException("SemanticProperties may not be null.");
}
LocalProperties returnProps = new LocalProperties();
// check if sorting is preserved
if (this.ordering != null) {
Ordering newOrdering = new Ordering();
for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
int sourceField = this.ordering.getInvolvedIndexes().get(i);
FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
if (targetField == null || targetField.size() == 0) {
if (i == 0) {
// order fully destroyed
newOrdering = null;
break;
} else {
// order partially preserved
break;
}
} else {
// use any field of target fields for now. We should use something like field equivalence sets in the future.
if (targetField.size() > 1) {
LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
}
newOrdering.appendOrdering(targetField.toArray()[0], this.ordering.getType(i), this.ordering.getOrder(i));
}
}
returnProps.ordering = newOrdering;
if (newOrdering != null) {
returnProps.groupedFields = newOrdering.getInvolvedIndexes();
} else {
returnProps.groupedFields = null;
}
} else // check if grouping is preserved
if (this.groupedFields != null) {
FieldList newGroupedFields = new FieldList();
for (Integer sourceField : this.groupedFields) {
FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
if (targetField == null || targetField.size() == 0) {
newGroupedFields = null;
break;
} else {
// use any field of target fields for now. We should use something like field equivalence sets in the future.
if (targetField.size() > 1) {
LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
}
newGroupedFields = newGroupedFields.addField(targetField.toArray()[0]);
}
}
returnProps.groupedFields = newGroupedFields;
}
if (this.uniqueFields != null) {
Set<FieldSet> newUniqueFields = new HashSet<FieldSet>();
for (FieldSet fields : this.uniqueFields) {
FieldSet newFields = new FieldSet();
for (Integer sourceField : fields) {
FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
if (targetField == null || targetField.size() == 0) {
newFields = null;
break;
} else {
// use any field of target fields for now. We should use something like field equivalence sets in the future.
if (targetField.size() > 1) {
LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
}
newFields = newFields.addField(targetField.toArray()[0]);
}
}
if (newFields != null) {
newUniqueFields.add(newFields);
}
}
if (!newUniqueFields.isEmpty()) {
returnProps.uniqueFields = newUniqueFields;
} else {
returnProps.uniqueFields = null;
}
}
return returnProps;
}
use of org.apache.flink.api.common.operators.Ordering in project flink by apache.
the class RequestedGlobalProperties method filterBySemanticProperties.
/**
* Filters these properties by what can be preserved by the given SemanticProperties when propagated down
* to the given input.
*
* @param props The SemanticProperties which define which fields are preserved.
* @param input The index of the operator's input.
* @return The filtered RequestedGlobalProperties
*/
public RequestedGlobalProperties filterBySemanticProperties(SemanticProperties props, int input) {
// no semantic properties available. All global properties are filtered.
if (props == null) {
throw new NullPointerException("SemanticProperties may not be null.");
}
RequestedGlobalProperties rgProp = new RequestedGlobalProperties();
switch(this.partitioning) {
case FULL_REPLICATION:
case FORCED_REBALANCED:
case CUSTOM_PARTITIONING:
case RANDOM_PARTITIONED:
case ANY_DISTRIBUTION:
// make sure that certain properties are not pushed down
return null;
case HASH_PARTITIONED:
case ANY_PARTITIONING:
FieldSet newFields;
if (this.partitioningFields instanceof FieldList) {
newFields = new FieldList();
} else {
newFields = new FieldSet();
}
for (Integer targetField : this.partitioningFields) {
int sourceField = props.getForwardingSourceField(input, targetField);
if (sourceField >= 0) {
newFields = newFields.addField(sourceField);
} else {
// partial partitionings are not preserved to avoid skewed partitioning
return null;
}
}
rgProp.partitioning = this.partitioning;
rgProp.partitioningFields = newFields;
return rgProp;
case RANGE_PARTITIONED:
// range partitioning
Ordering newOrdering = new Ordering();
for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
int value = this.ordering.getInvolvedIndexes().get(i);
int sourceField = props.getForwardingSourceField(input, value);
if (sourceField >= 0) {
newOrdering.appendOrdering(sourceField, this.ordering.getType(i), this.ordering.getOrder(i));
} else {
return null;
}
}
rgProp.partitioning = this.partitioning;
rgProp.ordering = newOrdering;
rgProp.dataDistribution = this.dataDistribution;
return rgProp;
default:
throw new RuntimeException("Unknown partitioning type encountered.");
}
}
use of org.apache.flink.api.common.operators.Ordering in project flink by apache.
the class RequestedLocalProperties method filterBySemanticProperties.
// --------------------------------------------------------------------------------------------
/**
* Filters these properties by what can be preserved by the given SemanticProperties when propagated down
* to the given input.
*
* @param props The SemanticProperties which define which fields are preserved.
* @param input The index of the operator's input.
* @return The filtered RequestedLocalProperties
*/
public RequestedLocalProperties filterBySemanticProperties(SemanticProperties props, int input) {
// no semantic properties, all local properties are filtered
if (props == null) {
throw new NullPointerException("SemanticProperties may not be null.");
}
if (this.ordering != null) {
Ordering newOrdering = new Ordering();
for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
int targetField = this.ordering.getInvolvedIndexes().get(i);
int sourceField = props.getForwardingSourceField(input, targetField);
if (sourceField >= 0) {
newOrdering.appendOrdering(sourceField, this.ordering.getType(i), this.ordering.getOrder(i));
} else {
return null;
}
}
return new RequestedLocalProperties(newOrdering);
} else if (this.groupedFields != null) {
FieldSet newGrouping = new FieldSet();
// check, whether the local key grouping is preserved
for (Integer targetField : this.groupedFields) {
int sourceField = props.getForwardingSourceField(input, targetField);
if (sourceField >= 0) {
newGrouping = newGrouping.addField(sourceField);
} else {
return null;
}
}
return new RequestedLocalProperties(newGrouping);
} else {
return null;
}
}
Aggregations