use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.
the class DeltaIterationTranslationTest method testCorrectTranslation.
@Test
public void testCorrectTranslation() {
try {
final String jobName = "Test JobName";
final String iterationName = "Test Name";
final String beforeNextWorksetMap = "Some Mapper";
final String aggregatorName = "AggregatorName";
final int[] iterationKeys = new int[] { 2 };
final int numIterations = 13;
final int defaultParallelism = 133;
final int iterationParallelism = 77;
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// ------------ construct the test program ------------------
{
env.setParallelism(defaultParallelism);
@SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
@SuppressWarnings("unchecked") DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, numIterations, iterationKeys);
iteration.name(iterationName).parallelism(iterationParallelism);
iteration.registerAggregator(aggregatorName, new LongSumAggregator());
// test that multiple workset consumers are supported
DataSet<Tuple2<Double, String>> worksetSelfJoin = iteration.getWorkset().map(new IdentityMapper<Tuple2<Double, String>>()).join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1);
DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());
DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(joined, joined.map(new NextWorksetMapper()).name(beforeNextWorksetMap));
result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
result.writeAsText("/dev/null");
}
Plan p = env.createProgramPlan(jobName);
// ------------- validate the plan ----------------
assertEquals(jobName, p.getJobName());
assertEquals(defaultParallelism, p.getDefaultParallelism());
// validate the iteration
GenericDataSinkBase<?> sink1, sink2;
{
Iterator<? extends GenericDataSinkBase<?>> sinks = p.getDataSinks().iterator();
sink1 = sinks.next();
sink2 = sinks.next();
}
DeltaIterationBase<?, ?> iteration = (DeltaIterationBase<?, ?>) sink1.getInput();
// check that multi consumer translation works for iterations
assertEquals(iteration, sink2.getInput());
// check the basic iteration properties
assertEquals(numIterations, iteration.getMaximumNumberOfIterations());
assertArrayEquals(iterationKeys, iteration.getSolutionSetKeyFields());
assertEquals(iterationParallelism, iteration.getParallelism());
assertEquals(iterationName, iteration.getName());
MapOperatorBase<?, ?, ?> nextWorksetMapper = (MapOperatorBase<?, ?, ?>) iteration.getNextWorkset();
InnerJoinOperatorBase<?, ?, ?, ?> solutionSetJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) iteration.getSolutionSetDelta();
InnerJoinOperatorBase<?, ?, ?, ?> worksetSelfJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) solutionSetJoin.getFirstInput();
MapOperatorBase<?, ?, ?> worksetMapper = (MapOperatorBase<?, ?, ?>) worksetSelfJoin.getFirstInput();
assertEquals(IdentityMapper.class, worksetMapper.getUserCodeWrapper().getUserCodeClass());
assertEquals(NextWorksetMapper.class, nextWorksetMapper.getUserCodeWrapper().getUserCodeClass());
if (solutionSetJoin.getUserCodeWrapper().getUserCodeObject() instanceof WrappingFunction) {
WrappingFunction<?> wf = (WrappingFunction<?>) solutionSetJoin.getUserCodeWrapper().getUserCodeObject();
assertEquals(SolutionWorksetJoin.class, wf.getWrappedFunction().getClass());
} else {
assertEquals(SolutionWorksetJoin.class, solutionSetJoin.getUserCodeWrapper().getUserCodeClass());
}
assertEquals(beforeNextWorksetMap, nextWorksetMapper.getName());
assertEquals(aggregatorName, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.
the class OperatorTranslation method translateToPlan.
public Plan translateToPlan(List<DataSink<?>> sinks, String jobName) {
List<GenericDataSinkBase<?>> planSinks = new ArrayList<>();
for (DataSink<?> sink : sinks) {
planSinks.add(translate(sink));
}
Plan p = new Plan(planSinks);
p.setJobName(jobName);
return p;
}
use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.
the class UnionTranslationTest method translateUnion2Group.
@Test
public void translateUnion2Group() {
try {
final int parallelism = 4;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> dataset1 = getSourceDataSet(env, 3);
DataSet<Tuple3<Double, StringValue, LongValue>> dataset2 = getSourceDataSet(env, 2);
dataset1.union(dataset2).groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "").reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {
}).returns(String.class).output(new DiscardingOutputFormat<>());
Plan p = env.createProgramPlan();
// The plan should look like the following one.
//
// DataSet1(3) - MapOperator(3)-+
// |- Union(-1) - SingleInputOperator - Sink
// DataSet2(2) - MapOperator(2)-+
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
Union unionOperator = (Union) ((SingleInputOperator) sink.getInput()).getInput();
// The key mappers should be added to both of the two input streams for union.
assertTrue(unionOperator.getFirstInput() instanceof MapOperatorBase<?, ?, ?>);
assertTrue(unionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);
// The parallelisms of the key mappers should be equal to those of their inputs.
assertEquals(unionOperator.getFirstInput().getParallelism(), 3);
assertEquals(unionOperator.getSecondInput().getParallelism(), 2);
// The union should always have the default parallelism.
assertEquals(unionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.
the class DataSink method translateToDataFlow.
// --------------------------------------------------------------------------------------------
protected GenericDataSinkBase<T> translateToDataFlow(Operator<T> input) {
// select the name (or create a default one)
String name = this.name != null ? this.name : this.format.toString();
GenericDataSinkBase<T> sink = new GenericDataSinkBase<>(this.format, new UnaryOperatorInformation<>(this.type, new NothingTypeInfo()), name);
// set input
sink.setInput(input);
// set parameters
if (this.parameters != null) {
sink.getParameters().addAll(this.parameters);
}
// set parallelism
if (this.parallelism > 0) {
// use specified parallelism
sink.setParallelism(this.parallelism);
} else {
// if no parallelism has been specified, use parallelism of input operator to enable
// chaining
sink.setParallelism(input.getParallelism());
}
if (this.sortKeyPositions != null) {
// configure output sorting
Ordering ordering = new Ordering();
for (int i = 0; i < this.sortKeyPositions.length; i++) {
ordering.appendOrdering(this.sortKeyPositions[i], null, this.sortOrders[i]);
}
sink.setLocalOrder(ordering);
}
return sink;
}
use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.
the class GraphCreatingVisitor method preVisit.
@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
// check if we have been here before
if (this.con2node.containsKey(c)) {
return false;
}
final OptimizerNode n;
// create a node for the operator (or sink or source) if we have not been here before
if (c instanceof GenericDataSinkBase) {
DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
this.sinks.add(dsn);
n = dsn;
} else if (c instanceof GenericDataSourceBase) {
n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
} else if (c instanceof MapOperatorBase) {
n = new MapNode((MapOperatorBase<?, ?, ?>) c);
} else if (c instanceof MapPartitionOperatorBase) {
n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
} else if (c instanceof FlatMapOperatorBase) {
n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
} else if (c instanceof FilterOperatorBase) {
n = new FilterNode((FilterOperatorBase<?, ?>) c);
} else if (c instanceof ReduceOperatorBase) {
n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
} else if (c instanceof GroupCombineOperatorBase) {
n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
} else if (c instanceof GroupReduceOperatorBase) {
n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
} else if (c instanceof InnerJoinOperatorBase) {
n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof OuterJoinOperatorBase) {
n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupOperatorBase) {
n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupRawOperatorBase) {
n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CrossOperatorBase) {
n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof BulkIterationBase) {
n = new BulkIterationNode((BulkIterationBase<?>) c);
} else if (c instanceof DeltaIterationBase) {
n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
} else if (c instanceof Union) {
n = new BinaryUnionNode((Union<?>) c);
} else if (c instanceof PartitionOperatorBase) {
n = new PartitionNode((PartitionOperatorBase<?>) c);
} else if (c instanceof SortPartitionOperatorBase) {
n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
} else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
WorksetNode p = new WorksetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else {
throw new IllegalArgumentException("Unknown operator type: " + c);
}
this.con2node.put(c, n);
// key-less reducer (all-reduce)
if (n.getParallelism() < 1) {
// set the parallelism
int par = c.getParallelism();
if (n instanceof BinaryUnionNode) {
// Keep parallelism of union undefined for now.
// It will be determined based on the parallelism of its successor.
par = -1;
} else if (par > 0) {
if (this.forceParallelism && par != this.defaultParallelism) {
par = this.defaultParallelism;
Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
}
} else {
par = this.defaultParallelism;
}
n.setParallelism(par);
}
return true;
}
Aggregations