use of org.apache.flink.api.common.operators.Operator in project flink by apache.
the class NamesTest method testJoinWith.
@Test
public void testJoinWith() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
List<Tuple1<String>> strLi = new ArrayList<Tuple1<String>>();
strLi.add(new Tuple1<String>("a"));
strLi.add(new Tuple1<String>("b"));
DataSet<Tuple1<String>> strs = env.fromCollection(strLi);
DataSet<Tuple1<String>> strs1 = env.fromCollection(strLi);
strs.join(strs1).where(0).equalTo(0).with(new FlatJoinFunction<Tuple1<String>, Tuple1<String>, String>() {
@Override
public void join(Tuple1<String> first, Tuple1<String> second, Collector<String> out) throws Exception {
//
}
}).output(new DiscardingOutputFormat<String>());
Plan plan = env.createProgramPlan();
plan.accept(new Visitor<Operator<?>>() {
@Override
public boolean preVisit(Operator<?> visitable) {
if (visitable instanceof InnerJoinOperatorBase) {
Assert.assertEquals("Join at testJoinWith(NamesTest.java:93)", visitable.getName());
}
return true;
}
@Override
public void postVisit(Operator<?> visitable) {
}
});
}
use of org.apache.flink.api.common.operators.Operator in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks2.
/**
*
* <pre>
* (SRC A)
* |
* (MAP A)
* / \
* (MAP B) (MAP C)
* / / \
* (SINK A) (SINK B) (SINK C)
* </pre>
*/
@SuppressWarnings("unchecked")
@Test
public void testBranchingWithMultipleDataSinks2() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source = env.generateSequence(1, 10000);
DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
DataSet<Long> mappedB = mappedA.map(new IdentityMapper<Long>());
DataSet<Long> mappedC = mappedA.map(new IdentityMapper<Long>());
mappedB.output(new DiscardingOutputFormat<Long>());
mappedC.output(new DiscardingOutputFormat<Long>());
mappedC.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
Set<Operator<?>> sinks = new HashSet<Operator<?>>(plan.getDataSinks());
OptimizedPlan oPlan = compileNoStats(plan);
// ---------- check the optimizer plan ----------
// number of sinks
assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
// remove matching sinks to check relation
for (SinkPlanNode sink : oPlan.getDataSinks()) {
assertTrue(sinks.remove(sink.getProgramOperator()));
}
assertTrue(sinks.isEmpty());
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.operators.Operator in project flink by apache.
the class OperatorTranslation method translate.
private <T> Operator<T> translate(DataSet<T> dataSet) {
while (dataSet instanceof NoOpOperator) {
dataSet = ((NoOpOperator<T>) dataSet).getInput();
}
// check if we have already translated that data set (operation or source)
Operator<?> previous = this.translated.get(dataSet);
if (previous != null) {
// The optimizer will merge subsequent binary unions into one n-ary union.
if (!(dataSet instanceof UnionOperator)) {
// all other operators are reused.
@SuppressWarnings("unchecked") Operator<T> typedPrevious = (Operator<T>) previous;
return typedPrevious;
}
}
Operator<T> dataFlowOp;
if (dataSet instanceof DataSource) {
DataSource<T> dataSource = (DataSource<T>) dataSet;
dataFlowOp = dataSource.translateToDataFlow();
dataFlowOp.setResources(dataSource.getMinResources(), dataSource.getPreferredResources());
} else if (dataSet instanceof SingleInputOperator) {
SingleInputOperator<?, ?, ?> singleInputOperator = (SingleInputOperator<?, ?, ?>) dataSet;
dataFlowOp = translateSingleInputOperator(singleInputOperator);
dataFlowOp.setResources(singleInputOperator.getMinResources(), singleInputOperator.getPreferredResources());
} else if (dataSet instanceof TwoInputOperator) {
TwoInputOperator<?, ?, ?, ?> twoInputOperator = (TwoInputOperator<?, ?, ?, ?>) dataSet;
dataFlowOp = translateTwoInputOperator(twoInputOperator);
dataFlowOp.setResources(twoInputOperator.getMinResources(), twoInputOperator.getPreferredResources());
} else if (dataSet instanceof BulkIterationResultSet) {
BulkIterationResultSet<?> bulkIterationResultSet = (BulkIterationResultSet<?>) dataSet;
dataFlowOp = translateBulkIteration(bulkIterationResultSet);
dataFlowOp.setResources(bulkIterationResultSet.getIterationHead().getMinResources(), bulkIterationResultSet.getIterationHead().getPreferredResources());
} else if (dataSet instanceof DeltaIterationResultSet) {
DeltaIterationResultSet<?, ?> deltaIterationResultSet = (DeltaIterationResultSet<?, ?>) dataSet;
dataFlowOp = translateDeltaIteration(deltaIterationResultSet);
dataFlowOp.setResources(deltaIterationResultSet.getIterationHead().getMinResources(), deltaIterationResultSet.getIterationHead().getPreferredResources());
} else if (dataSet instanceof DeltaIteration.SolutionSetPlaceHolder || dataSet instanceof DeltaIteration.WorksetPlaceHolder) {
throw new InvalidProgramException("A data set that is part of a delta iteration was used as a sink or action." + " Did you forget to close the iteration?");
} else {
throw new RuntimeException("Error while creating the data flow plan for the program: Unknown operator or data set type: " + dataSet);
}
this.translated.put(dataSet, dataFlowOp);
// take care of broadcast variables
translateBcVariables(dataSet, dataFlowOp);
return dataFlowOp;
}
use of org.apache.flink.api.common.operators.Operator in project flink by apache.
the class GraphCreatingVisitor method postVisit.
@Override
public void postVisit(Operator<?> c) {
OptimizerNode n = this.con2node.get(c);
// first connect to the predecessors
n.setInput(this.con2node, this.defaultDataExchangeMode);
n.setBroadcastInputs(this.con2node, this.defaultDataExchangeMode);
// if the node represents a bulk iteration, we recursively translate the data flow now
if (n instanceof BulkIterationNode) {
final BulkIterationNode iterNode = (BulkIterationNode) n;
final BulkIterationBase<?> iter = iterNode.getIterationContract();
// pass a copy of the no iterative part into the iteration translation,
// in case the iteration references its closure
HashMap<Operator<?>, OptimizerNode> closure = new HashMap<Operator<?>, OptimizerNode>(con2node);
// first, recursively build the data flow for the step function
final GraphCreatingVisitor recursiveCreator = new GraphCreatingVisitor(this, true, iterNode.getParallelism(), defaultDataExchangeMode, closure);
BulkPartialSolutionNode partialSolution;
iter.getNextPartialSolution().accept(recursiveCreator);
partialSolution = (BulkPartialSolutionNode) recursiveCreator.con2node.get(iter.getPartialSolution());
OptimizerNode rootOfStepFunction = recursiveCreator.con2node.get(iter.getNextPartialSolution());
if (partialSolution == null) {
throw new CompilerException("Error: The step functions result does not depend on the partial solution.");
}
OptimizerNode terminationCriterion = null;
if (iter.getTerminationCriterion() != null) {
terminationCriterion = recursiveCreator.con2node.get(iter.getTerminationCriterion());
// no intermediate node yet, traverse from the termination criterion to build the missing parts
if (terminationCriterion == null) {
iter.getTerminationCriterion().accept(recursiveCreator);
terminationCriterion = recursiveCreator.con2node.get(iter.getTerminationCriterion());
}
}
iterNode.setPartialSolution(partialSolution);
iterNode.setNextPartialSolution(rootOfStepFunction, terminationCriterion);
// go over the contained data flow and mark the dynamic path nodes
StaticDynamicPathIdentifier identifier = new StaticDynamicPathIdentifier(iterNode.getCostWeight());
iterNode.acceptForStepFunction(identifier);
} else if (n instanceof WorksetIterationNode) {
final WorksetIterationNode iterNode = (WorksetIterationNode) n;
final DeltaIterationBase<?, ?> iter = iterNode.getIterationContract();
// we need to ensure that both the next-workset and the solution-set-delta depend on the workset.
// One check is for free during the translation, we do the other check here as a pre-condition
{
StepFunctionValidator wsf = new StepFunctionValidator();
iter.getNextWorkset().accept(wsf);
if (!wsf.hasFoundWorkset()) {
throw new CompilerException("In the given program, the next workset does not depend on the workset. " + "This is a prerequisite in delta iterations.");
}
}
// calculate the closure of the anonymous function
HashMap<Operator<?>, OptimizerNode> closure = new HashMap<Operator<?>, OptimizerNode>(con2node);
// first, recursively build the data flow for the step function
final GraphCreatingVisitor recursiveCreator = new GraphCreatingVisitor(this, true, iterNode.getParallelism(), defaultDataExchangeMode, closure);
// descend from the solution set delta. check that it depends on both the workset
// and the solution set. If it does depend on both, this descend should create both nodes
iter.getSolutionSetDelta().accept(recursiveCreator);
final WorksetNode worksetNode = (WorksetNode) recursiveCreator.con2node.get(iter.getWorkset());
if (worksetNode == null) {
throw new CompilerException("In the given program, the solution set delta does not depend on the workset." + "This is a prerequisite in delta iterations.");
}
iter.getNextWorkset().accept(recursiveCreator);
SolutionSetNode solutionSetNode = (SolutionSetNode) recursiveCreator.con2node.get(iter.getSolutionSet());
if (solutionSetNode == null || solutionSetNode.getOutgoingConnections() == null || solutionSetNode.getOutgoingConnections().isEmpty()) {
solutionSetNode = new SolutionSetNode((DeltaIterationBase.SolutionSetPlaceHolder<?>) iter.getSolutionSet(), iterNode);
} else {
for (DagConnection conn : solutionSetNode.getOutgoingConnections()) {
OptimizerNode successor = conn.getTarget();
if (successor.getClass() == JoinNode.class) {
// find out which input to the match the solution set is
JoinNode mn = (JoinNode) successor;
if (mn.getFirstPredecessorNode() == solutionSetNode) {
mn.makeJoinWithSolutionSet(0);
} else if (mn.getSecondPredecessorNode() == solutionSetNode) {
mn.makeJoinWithSolutionSet(1);
} else {
throw new CompilerException();
}
} else if (successor.getClass() == CoGroupNode.class) {
CoGroupNode cg = (CoGroupNode) successor;
if (cg.getFirstPredecessorNode() == solutionSetNode) {
cg.makeCoGroupWithSolutionSet(0);
} else if (cg.getSecondPredecessorNode() == solutionSetNode) {
cg.makeCoGroupWithSolutionSet(1);
} else {
throw new CompilerException();
}
} else {
throw new InvalidProgramException("Error: The only operations allowed on the solution set are Join and CoGroup.");
}
}
}
final OptimizerNode nextWorksetNode = recursiveCreator.con2node.get(iter.getNextWorkset());
final OptimizerNode solutionSetDeltaNode = recursiveCreator.con2node.get(iter.getSolutionSetDelta());
// set the step function nodes to the iteration node
iterNode.setPartialSolution(solutionSetNode, worksetNode);
iterNode.setNextPartialSolution(solutionSetDeltaNode, nextWorksetNode, defaultDataExchangeMode);
// go over the contained data flow and mark the dynamic path nodes
StaticDynamicPathIdentifier pathIdentifier = new StaticDynamicPathIdentifier(iterNode.getCostWeight());
iterNode.acceptForStepFunction(pathIdentifier);
}
}
use of org.apache.flink.api.common.operators.Operator in project flink by apache.
the class JoinTranslationTest method createPlanAndGetJoinNode.
private DualInputPlanNode createPlanAndGetJoinNode(JoinHint hint) {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> i1 = env.generateSequence(1, 1000);
DataSet<Long> i2 = env.generateSequence(1, 1000);
i1.join(i2, hint).where(new IdentityKeySelector<Long>()).equalTo(new IdentityKeySelector<Long>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan plan = env.createProgramPlan();
// set statistics to the sources
plan.accept(new Visitor<Operator<?>>() {
@Override
public boolean preVisit(Operator<?> visitable) {
if (visitable instanceof GenericDataSourceBase) {
GenericDataSourceBase<?, ?> source = (GenericDataSourceBase<?, ?>) visitable;
setSourceStatistics(source, 10000000, 1000);
}
return true;
}
@Override
public void postVisit(Operator<?> visitable) {
}
});
OptimizedPlan op = compileWithStats(plan);
return (DualInputPlanNode) ((SinkPlanNode) op.getDataSinks().iterator().next()).getInput().getSource();
}
Aggregations