use of org.apache.flink.api.common.operators.base.CoGroupOperatorBase in project flink by apache.
the class CoGroupSortTranslationTest method testSortTuplesAndPojos.
@Test
public void testSortTuplesAndPojos() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
DataSet<TestPoJo> input2 = env.fromElements(new TestPoJo());
input1.coGroup(input2).where(1).equalTo("b").sortFirstGroup(0, Order.DESCENDING).sortSecondGroup("c", Order.ASCENDING).sortSecondGroup("a", Order.DESCENDING).with(new CoGroupFunction<Tuple2<Long, Long>, TestPoJo, Long>() {
@Override
public void coGroup(Iterable<Tuple2<Long, Long>> first, Iterable<TestPoJo> second, Collector<Long> out) {
}
}).output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
CoGroupOperatorBase<?, ?, ?, ?> coGroup = (CoGroupOperatorBase<?, ?, ?, ?>) sink.getInput();
assertNotNull(coGroup.getGroupOrderForInputOne());
assertNotNull(coGroup.getGroupOrderForInputTwo());
assertEquals(1, coGroup.getGroupOrderForInputOne().getNumberOfFields());
assertEquals(0, coGroup.getGroupOrderForInputOne().getFieldNumber(0).intValue());
assertEquals(Order.DESCENDING, coGroup.getGroupOrderForInputOne().getOrder(0));
assertEquals(2, coGroup.getGroupOrderForInputTwo().getNumberOfFields());
assertEquals(2, coGroup.getGroupOrderForInputTwo().getFieldNumber(0).intValue());
assertEquals(0, coGroup.getGroupOrderForInputTwo().getFieldNumber(1).intValue());
assertEquals(Order.ASCENDING, coGroup.getGroupOrderForInputTwo().getOrder(0));
assertEquals(Order.DESCENDING, coGroup.getGroupOrderForInputTwo().getOrder(1));
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.operators.base.CoGroupOperatorBase in project flink by apache.
the class CoGroupSortTranslationTest method testGroupSortTuples.
@Test
public void testGroupSortTuples() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
input1.coGroup(input2).where(1).equalTo(2).sortFirstGroup(0, Order.DESCENDING).sortSecondGroup(1, Order.ASCENDING).sortSecondGroup(0, Order.DESCENDING).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple3<Long, Long, Long>, Long>() {
@Override
public void coGroup(Iterable<Tuple2<Long, Long>> first, Iterable<Tuple3<Long, Long, Long>> second, Collector<Long> out) {
}
}).output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
CoGroupOperatorBase<?, ?, ?, ?> coGroup = (CoGroupOperatorBase<?, ?, ?, ?>) sink.getInput();
assertNotNull(coGroup.getGroupOrderForInputOne());
assertNotNull(coGroup.getGroupOrderForInputTwo());
assertEquals(1, coGroup.getGroupOrderForInputOne().getNumberOfFields());
assertEquals(0, coGroup.getGroupOrderForInputOne().getFieldNumber(0).intValue());
assertEquals(Order.DESCENDING, coGroup.getGroupOrderForInputOne().getOrder(0));
assertEquals(2, coGroup.getGroupOrderForInputTwo().getNumberOfFields());
assertEquals(1, coGroup.getGroupOrderForInputTwo().getFieldNumber(0).intValue());
assertEquals(0, coGroup.getGroupOrderForInputTwo().getFieldNumber(1).intValue());
assertEquals(Order.ASCENDING, coGroup.getGroupOrderForInputTwo().getOrder(0));
assertEquals(Order.DESCENDING, coGroup.getGroupOrderForInputTwo().getOrder(1));
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.operators.base.CoGroupOperatorBase in project flink by apache.
the class CoGroupOperator method translateToDataFlow.
@Override
@Internal
protected org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> translateToDataFlow(Operator<I1> input1, Operator<I2> input2) {
String name = getName() != null ? getName() : "CoGroup at " + defaultName;
try {
keys1.areCompatible(keys2);
} catch (IncompatibleKeysException e) {
throw new InvalidProgramException("The types of the key fields do not match.", e);
}
final org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> po;
if (keys1 instanceof SelectorFunctionKeys && keys2 instanceof SelectorFunctionKeys) {
@SuppressWarnings("unchecked") SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1;
@SuppressWarnings("unchecked") SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2;
po = translateSelectorFunctionCoGroup(selectorKeys1, selectorKeys2, function, getResultType(), name, input1, input2);
po.setParallelism(getParallelism());
po.setCustomPartitioner(customPartitioner);
} else if (keys2 instanceof SelectorFunctionKeys) {
int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
@SuppressWarnings("unchecked") SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2;
po = translateSelectorFunctionCoGroupRight(logicalKeyPositions1, selectorKeys2, function, getInput1Type(), getResultType(), name, input1, input2);
po.setParallelism(getParallelism());
po.setCustomPartitioner(customPartitioner);
} else if (keys1 instanceof SelectorFunctionKeys) {
@SuppressWarnings("unchecked") SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1;
int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
po = translateSelectorFunctionCoGroupLeft(selectorKeys1, logicalKeyPositions2, function, getInput2Type(), getResultType(), name, input1, input2);
} else if (keys1 instanceof Keys.ExpressionKeys && keys2 instanceof Keys.ExpressionKeys) {
try {
keys1.areCompatible(keys2);
} catch (IncompatibleKeysException e) {
throw new InvalidProgramException("The types of the key fields do not match.", e);
}
int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
CoGroupOperatorBase<I1, I2, OUT, CoGroupFunction<I1, I2, OUT>> op = new CoGroupOperatorBase<>(function, new BinaryOperatorInformation<>(getInput1Type(), getInput2Type(), getResultType()), logicalKeyPositions1, logicalKeyPositions2, name);
op.setFirstInput(input1);
op.setSecondInput(input2);
po = op;
} else {
throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
}
// configure shared characteristics
po.setParallelism(getParallelism());
po.setCustomPartitioner(customPartitioner);
if (groupSortKeyOrderFirst.size() > 0) {
Ordering o = new Ordering();
for (Pair<Integer, Order> entry : groupSortKeyOrderFirst) {
o.appendOrdering(entry.getLeft(), null, entry.getRight());
}
po.setGroupOrderForInputOne(o);
}
if (groupSortKeyOrderSecond.size() > 0) {
Ordering o = new Ordering();
for (Pair<Integer, Order> entry : groupSortKeyOrderSecond) {
o.appendOrdering(entry.getLeft(), null, entry.getRight());
}
po.setGroupOrderForInputTwo(o);
}
return po;
}
use of org.apache.flink.api.common.operators.base.CoGroupOperatorBase in project flink by apache.
the class GraphCreatingVisitor method preVisit.
@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
// check if we have been here before
if (this.con2node.containsKey(c)) {
return false;
}
final OptimizerNode n;
// create a node for the operator (or sink or source) if we have not been here before
if (c instanceof GenericDataSinkBase) {
DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
this.sinks.add(dsn);
n = dsn;
} else if (c instanceof GenericDataSourceBase) {
n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
} else if (c instanceof MapOperatorBase) {
n = new MapNode((MapOperatorBase<?, ?, ?>) c);
} else if (c instanceof MapPartitionOperatorBase) {
n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
} else if (c instanceof FlatMapOperatorBase) {
n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
} else if (c instanceof FilterOperatorBase) {
n = new FilterNode((FilterOperatorBase<?, ?>) c);
} else if (c instanceof ReduceOperatorBase) {
n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
} else if (c instanceof GroupCombineOperatorBase) {
n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
} else if (c instanceof GroupReduceOperatorBase) {
n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
} else if (c instanceof InnerJoinOperatorBase) {
n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof OuterJoinOperatorBase) {
n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupOperatorBase) {
n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupRawOperatorBase) {
n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CrossOperatorBase) {
n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof BulkIterationBase) {
n = new BulkIterationNode((BulkIterationBase<?>) c);
} else if (c instanceof DeltaIterationBase) {
n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
} else if (c instanceof Union) {
n = new BinaryUnionNode((Union<?>) c);
} else if (c instanceof PartitionOperatorBase) {
n = new PartitionNode((PartitionOperatorBase<?>) c);
} else if (c instanceof SortPartitionOperatorBase) {
n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
} else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
WorksetNode p = new WorksetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else {
throw new IllegalArgumentException("Unknown operator type: " + c);
}
this.con2node.put(c, n);
// key-less reducer (all-reduce)
if (n.getParallelism() < 1) {
// set the parallelism
int par = c.getParallelism();
if (par > 0) {
if (this.forceParallelism && par != this.defaultParallelism) {
par = this.defaultParallelism;
Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
}
} else {
par = this.defaultParallelism;
}
n.setParallelism(par);
}
return true;
}
Aggregations