use of org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint in project flink by apache.
the class OverwriteObjects method testJoin.
// --------------------------------------------------------------------------------------------
public void testJoin(ExecutionEnvironment env) throws Exception {
for (JoinHint joinHint : JoinHint.values()) {
if (joinHint == JoinHint.OPTIMIZER_CHOOSES) {
continue;
}
List<Tuple2<IntValue, IntValue>> enabledResult;
List<Tuple2<IntValue, IntValue>> disabledResult;
// Inner join
LOG.info("Testing inner join with JoinHint = {}", joinHint);
env.getConfig().enableObjectReuse();
enabledResult = getDataSet(env).join(getDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
Collections.sort(enabledResult, comparator);
env.getConfig().disableObjectReuse();
disabledResult = getDataSet(env).join(getDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
Collections.sort(disabledResult, comparator);
Assert.assertEquals("JoinHint=" + joinHint, disabledResult, enabledResult);
if (joinHint != JoinHint.BROADCAST_HASH_FIRST) {
LOG.info("Testing left outer join with JoinHint = {}", joinHint);
env.getConfig().enableObjectReuse();
enabledResult = getDataSet(env).leftOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
Collections.sort(enabledResult, comparator);
env.getConfig().disableObjectReuse();
disabledResult = getDataSet(env).leftOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
Collections.sort(disabledResult, comparator);
Assert.assertThat("JoinHint=" + joinHint, disabledResult, is(enabledResult));
}
if (joinHint != JoinHint.BROADCAST_HASH_SECOND) {
LOG.info("Testing right outer join with JoinHint = {}", joinHint);
env.getConfig().enableObjectReuse();
enabledResult = getDataSet(env).rightOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
Collections.sort(enabledResult, comparator);
env.getConfig().disableObjectReuse();
disabledResult = getDataSet(env).rightOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
Collections.sort(disabledResult, comparator);
Assert.assertThat("JoinHint=" + joinHint, disabledResult, is(enabledResult));
}
if (joinHint != JoinHint.BROADCAST_HASH_FIRST && joinHint != JoinHint.BROADCAST_HASH_SECOND) {
LOG.info("Testing full outer join with JoinHint = {}", joinHint);
env.getConfig().enableObjectReuse();
enabledResult = getDataSet(env).fullOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
Collections.sort(enabledResult, comparator);
env.getConfig().disableObjectReuse();
disabledResult = getDataSet(env).fullOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
Collections.sort(disabledResult, comparator);
Assert.assertThat("JoinHint=" + joinHint, disabledResult, is(enabledResult));
}
}
}
use of org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint in project flink by apache.
the class OuterJoinNode method getDataProperties.
private List<OperatorDescriptorDual> getDataProperties() {
OuterJoinOperatorBase<?, ?, ?, ?> operator = getOperator();
OuterJoinType type = operator.getOuterJoinType();
JoinHint joinHint = operator.getJoinHint();
joinHint = joinHint == null ? JoinHint.OPTIMIZER_CHOOSES : joinHint;
List<OperatorDescriptorDual> list;
switch(type) {
case LEFT:
list = createLeftOuterJoinDescriptors(joinHint);
break;
case RIGHT:
list = createRightOuterJoinDescriptors(joinHint);
break;
case FULL:
list = createFullOuterJoinDescriptors(joinHint);
break;
default:
throw new CompilerException("Unknown outer join type: " + type);
}
Partitioner<?> customPartitioner = operator.getCustomPartitioner();
if (customPartitioner != null) {
for (OperatorDescriptorDual desc : list) {
((AbstractJoinDescriptor) desc).setCustomPartitioner(customPartitioner);
}
}
return list;
}
use of org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint in project flink by apache.
the class MaximumDegree method runInternal.
/*
* Implementation notes:
*
* The three leftOuterJoin below could be implemented more efficiently
* as an anti-join when available in Flink.
*/
@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input) throws Exception {
// u, d(u)
DataSet<Vertex<K, LongValue>> vertexDegree = input.run(new VertexDegree<K, VV, EV>().setReduceOnTargetId(reduceOnTargetId.get()).setParallelism(parallelism));
// u, d(u) if d(u) > maximumDegree
DataSet<Tuple1<K>> highDegreeVertices = vertexDegree.flatMap(new DegreeFilter<K>(maximumDegree)).setParallelism(parallelism).name("Filter high-degree vertices");
JoinHint joinHint = broadcastHighDegreeVertices.get() ? JoinHint.BROADCAST_HASH_SECOND : JoinHint.REPARTITION_HASH_SECOND;
// Vertices
DataSet<Vertex<K, VV>> vertices = input.getVertices().leftOuterJoin(highDegreeVertices, joinHint).where(0).equalTo(0).with(new ProjectVertex<K, VV>()).setParallelism(parallelism).name("Project low-degree vertices");
// Edges
DataSet<Edge<K, EV>> edges = input.getEdges().leftOuterJoin(highDegreeVertices, joinHint).where(reduceOnTargetId.get() ? 1 : 0).equalTo(0).with(new ProjectEdge<K, EV>()).setParallelism(parallelism).name("Project low-degree edges by " + (reduceOnTargetId.get() ? "target" : "source")).leftOuterJoin(highDegreeVertices, joinHint).where(reduceOnTargetId.get() ? 0 : 1).equalTo(0).with(new ProjectEdge<K, EV>()).setParallelism(parallelism).name("Project low-degree edges by " + (reduceOnTargetId.get() ? "source" : "target"));
// Graph
return Graph.fromDataSet(vertices, edges, input.getContext());
}
Aggregations