use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.
the class GraphCreatingVisitor method preVisit.
@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
// check if we have been here before
if (this.con2node.containsKey(c)) {
return false;
}
final OptimizerNode n;
// create a node for the operator (or sink or source) if we have not been here before
if (c instanceof GenericDataSinkBase) {
DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
this.sinks.add(dsn);
n = dsn;
} else if (c instanceof GenericDataSourceBase) {
n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
} else if (c instanceof MapOperatorBase) {
n = new MapNode((MapOperatorBase<?, ?, ?>) c);
} else if (c instanceof MapPartitionOperatorBase) {
n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
} else if (c instanceof FlatMapOperatorBase) {
n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
} else if (c instanceof FilterOperatorBase) {
n = new FilterNode((FilterOperatorBase<?, ?>) c);
} else if (c instanceof ReduceOperatorBase) {
n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
} else if (c instanceof GroupCombineOperatorBase) {
n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
} else if (c instanceof GroupReduceOperatorBase) {
n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
} else if (c instanceof InnerJoinOperatorBase) {
n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof OuterJoinOperatorBase) {
n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupOperatorBase) {
n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupRawOperatorBase) {
n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CrossOperatorBase) {
n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof BulkIterationBase) {
n = new BulkIterationNode((BulkIterationBase<?>) c);
} else if (c instanceof DeltaIterationBase) {
n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
} else if (c instanceof Union) {
n = new BinaryUnionNode((Union<?>) c);
} else if (c instanceof PartitionOperatorBase) {
n = new PartitionNode((PartitionOperatorBase<?>) c);
} else if (c instanceof SortPartitionOperatorBase) {
n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
} else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
WorksetNode p = new WorksetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else {
throw new IllegalArgumentException("Unknown operator type: " + c);
}
this.con2node.put(c, n);
// key-less reducer (all-reduce)
if (n.getParallelism() < 1) {
// set the parallelism
int par = c.getParallelism();
if (n instanceof BinaryUnionNode) {
// Keep parallelism of union undefined for now.
// It will be determined based on the parallelism of its successor.
par = -1;
} else if (par > 0) {
if (this.forceParallelism && par != this.defaultParallelism) {
par = this.defaultParallelism;
Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
}
} else {
par = this.defaultParallelism;
}
n.setParallelism(par);
}
return true;
}
use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.
the class RangePartitionRewriter method rewriteRangePartitionChannel.
private List<Channel> rewriteRangePartitionChannel(Channel channel) {
final List<Channel> sourceNewOutputChannels = new ArrayList<>();
final PlanNode sourceNode = channel.getSource();
final PlanNode targetNode = channel.getTarget();
final int sourceParallelism = sourceNode.getParallelism();
final int targetParallelism = targetNode.getParallelism();
final Costs defaultZeroCosts = new Costs(0, 0, 0);
final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
// 1. Fixed size sample in each partitions.
final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
sipNode.setParallelism(sourceParallelism);
sipPlanNode.setParallelism(sourceParallelism);
sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sipPlanNode.setCosts(defaultZeroCosts);
sipChannel.setTarget(sipPlanNode);
this.plan.getAllNodes().add(sipPlanNode);
sourceNewOutputChannels.add(sipChannel);
// 2. Fixed size sample in a single coordinator.
final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
sicNode.setParallelism(1);
sicPlanNode.setParallelism(1);
sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sicPlanNode.setCosts(defaultZeroCosts);
sicChannel.setTarget(sicPlanNode);
sipPlanNode.addOutgoingChannel(sicChannel);
this.plan.getAllNodes().add(sicPlanNode);
// 3. Use sampled data to build range boundaries.
final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
rbNode.setParallelism(1);
rbPlanNode.setParallelism(1);
rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
rbPlanNode.setCosts(defaultZeroCosts);
rbChannel.setTarget(rbPlanNode);
sicPlanNode.addOutgoingChannel(rbChannel);
this.plan.getAllNodes().add(rbPlanNode);
// 4. Take range boundaries as broadcast input and take the tuple of partition id and record
// as output.
final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
// To avoid deadlock, set the DataExchangeMode of channel between source node and this to
// Batch.
ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
ariNode.setParallelism(sourceParallelism);
ariPlanNode.setParallelism(sourceParallelism);
ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
ariPlanNode.setCosts(defaultZeroCosts);
ariChannel.setTarget(ariPlanNode);
this.plan.getAllNodes().add(ariPlanNode);
sourceNewOutputChannels.add(ariChannel);
final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
broadcastChannel.setTarget(ariPlanNode);
List<NamedChannel> broadcastChannels = new ArrayList<>(1);
broadcastChannels.add(broadcastChannel);
ariPlanNode.setBroadcastInputs(broadcastChannels);
// 5. Remove the partition id.
final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
final FieldList keys = new FieldList(0);
partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
ariPlanNode.addOutgoingChannel(partChannel);
final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
final MapNode prRemoverNode = new MapNode(prOperatorBase);
final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
partChannel.setTarget(prPlanNode);
prRemoverNode.setParallelism(targetParallelism);
prPlanNode.setParallelism(targetParallelism);
GlobalProperties globalProperties = new GlobalProperties();
globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
prPlanNode.initProperties(globalProperties, new LocalProperties());
prPlanNode.setCosts(defaultZeroCosts);
this.plan.getAllNodes().add(prPlanNode);
// 6. Connect to target node.
channel.setSource(prPlanNode);
channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
prPlanNode.addOutgoingChannel(channel);
return sourceNewOutputChannels;
}
use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.
the class JavaApiPostPass method traverseChannel.
private void traverseChannel(Channel channel) {
PlanNode source = channel.getSource();
Operator<?> javaOp = source.getProgramOperator();
// if (!(javaOp instanceof BulkIteration) && !(javaOp instanceof JavaPlanNode)) {
// throw new RuntimeException("Wrong operator type found in post pass: " + javaOp);
// }
TypeInformation<?> type = javaOp.getOperatorInfo().getOutputType();
if (javaOp instanceof GroupReduceOperatorBase && (source.getDriverStrategy() == DriverStrategy.SORTED_GROUP_COMBINE || source.getDriverStrategy() == DriverStrategy.ALL_GROUP_REDUCE_COMBINE)) {
GroupReduceOperatorBase<?, ?, ?> groupNode = (GroupReduceOperatorBase<?, ?, ?>) javaOp;
type = groupNode.getInput().getOperatorInfo().getOutputType();
} else if (javaOp instanceof PlanUnwrappingReduceGroupOperator && source.getDriverStrategy().equals(DriverStrategy.SORTED_GROUP_COMBINE)) {
PlanUnwrappingReduceGroupOperator<?, ?, ?> groupNode = (PlanUnwrappingReduceGroupOperator<?, ?, ?>) javaOp;
type = groupNode.getInput().getOperatorInfo().getOutputType();
}
// the serializer always exists
channel.setSerializer(createSerializer(type));
// parameterize the ship strategy
if (channel.getShipStrategy().requiresComparator()) {
channel.setShipStrategyComparator(createComparator(type, channel.getShipStrategyKeys(), getSortOrders(channel.getShipStrategyKeys(), channel.getShipStrategySortOrder())));
}
// parameterize the local strategy
if (channel.getLocalStrategy().requiresComparator()) {
channel.setLocalStrategyComparator(createComparator(type, channel.getLocalStrategyKeys(), getSortOrders(channel.getLocalStrategyKeys(), channel.getLocalStrategySortOrder())));
}
// descend to the channel's source
traverse(channel.getSource());
}
use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.
the class AggregateTranslationTest method translateAggregate.
@Test
public void translateAggregate() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
@SuppressWarnings("unchecked") DataSet<Tuple3<Double, StringValue, Long>> initialData = env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));
initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();
// check keys
assertEquals(1, reducer.getKeyColumns(0).length);
assertEquals(0, reducer.getKeyColumns(0)[0]);
assertEquals(-1, reducer.getParallelism());
assertTrue(reducer.isCombinable());
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Aggregations