Search in sources :

Example 1 with JoinHint

use of org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint in project flink by apache.

the class OverwriteObjects method testJoin.

// --------------------------------------------------------------------------------------------
public void testJoin(ExecutionEnvironment env) throws Exception {
    for (JoinHint joinHint : JoinHint.values()) {
        if (joinHint == JoinHint.OPTIMIZER_CHOOSES) {
            continue;
        }
        List<Tuple2<IntValue, IntValue>> enabledResult;
        List<Tuple2<IntValue, IntValue>> disabledResult;
        // Inner join
        LOG.info("Testing inner join with JoinHint = {}", joinHint);
        env.getConfig().enableObjectReuse();
        enabledResult = getDataSet(env).join(getDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
        Collections.sort(enabledResult, comparator);
        env.getConfig().disableObjectReuse();
        disabledResult = getDataSet(env).join(getDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
        Collections.sort(disabledResult, comparator);
        Assert.assertEquals("JoinHint=" + joinHint, disabledResult, enabledResult);
        if (joinHint != JoinHint.BROADCAST_HASH_FIRST) {
            LOG.info("Testing left outer join with JoinHint = {}", joinHint);
            env.getConfig().enableObjectReuse();
            enabledResult = getDataSet(env).leftOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
            Collections.sort(enabledResult, comparator);
            env.getConfig().disableObjectReuse();
            disabledResult = getDataSet(env).leftOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
            Collections.sort(disabledResult, comparator);
            Assert.assertThat("JoinHint=" + joinHint, disabledResult, is(enabledResult));
        }
        if (joinHint != JoinHint.BROADCAST_HASH_SECOND) {
            LOG.info("Testing right outer join with JoinHint = {}", joinHint);
            env.getConfig().enableObjectReuse();
            enabledResult = getDataSet(env).rightOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
            Collections.sort(enabledResult, comparator);
            env.getConfig().disableObjectReuse();
            disabledResult = getDataSet(env).rightOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
            Collections.sort(disabledResult, comparator);
            Assert.assertThat("JoinHint=" + joinHint, disabledResult, is(enabledResult));
        }
        if (joinHint != JoinHint.BROADCAST_HASH_FIRST && joinHint != JoinHint.BROADCAST_HASH_SECOND) {
            LOG.info("Testing full outer join with JoinHint = {}", joinHint);
            env.getConfig().enableObjectReuse();
            enabledResult = getDataSet(env).fullOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
            Collections.sort(enabledResult, comparator);
            env.getConfig().disableObjectReuse();
            disabledResult = getDataSet(env).fullOuterJoin(getFilteredDataSet(env), joinHint).where(0).equalTo(0).with(new OverwriteObjectsJoin()).collect();
            Collections.sort(disabledResult, comparator);
            Assert.assertThat("JoinHint=" + joinHint, disabledResult, is(enabledResult));
        }
    }
}
Also used : JoinHint(org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Example 2 with JoinHint

use of org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint in project flink by apache.

the class OuterJoinNode method getDataProperties.

private List<OperatorDescriptorDual> getDataProperties() {
    OuterJoinOperatorBase<?, ?, ?, ?> operator = getOperator();
    OuterJoinType type = operator.getOuterJoinType();
    JoinHint joinHint = operator.getJoinHint();
    joinHint = joinHint == null ? JoinHint.OPTIMIZER_CHOOSES : joinHint;
    List<OperatorDescriptorDual> list;
    switch(type) {
        case LEFT:
            list = createLeftOuterJoinDescriptors(joinHint);
            break;
        case RIGHT:
            list = createRightOuterJoinDescriptors(joinHint);
            break;
        case FULL:
            list = createFullOuterJoinDescriptors(joinHint);
            break;
        default:
            throw new CompilerException("Unknown outer join type: " + type);
    }
    Partitioner<?> customPartitioner = operator.getCustomPartitioner();
    if (customPartitioner != null) {
        for (OperatorDescriptorDual desc : list) {
            ((AbstractJoinDescriptor) desc).setCustomPartitioner(customPartitioner);
        }
    }
    return list;
}
Also used : JoinHint(org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint) CompilerException(org.apache.flink.optimizer.CompilerException) OperatorDescriptorDual(org.apache.flink.optimizer.operators.OperatorDescriptorDual) AbstractJoinDescriptor(org.apache.flink.optimizer.operators.AbstractJoinDescriptor) OuterJoinType(org.apache.flink.api.common.operators.base.OuterJoinOperatorBase.OuterJoinType)

Example 3 with JoinHint

use of org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint in project flink by apache.

the class MaximumDegree method runInternal.

/*
	 * Implementation notes:
	 *
	 * The three leftOuterJoin below could be implemented more efficiently
	 *   as an anti-join when available in Flink.
	 */
@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input) throws Exception {
    // u, d(u)
    DataSet<Vertex<K, LongValue>> vertexDegree = input.run(new VertexDegree<K, VV, EV>().setReduceOnTargetId(reduceOnTargetId.get()).setParallelism(parallelism));
    // u, d(u) if d(u) > maximumDegree
    DataSet<Tuple1<K>> highDegreeVertices = vertexDegree.flatMap(new DegreeFilter<K>(maximumDegree)).setParallelism(parallelism).name("Filter high-degree vertices");
    JoinHint joinHint = broadcastHighDegreeVertices.get() ? JoinHint.BROADCAST_HASH_SECOND : JoinHint.REPARTITION_HASH_SECOND;
    // Vertices
    DataSet<Vertex<K, VV>> vertices = input.getVertices().leftOuterJoin(highDegreeVertices, joinHint).where(0).equalTo(0).with(new ProjectVertex<K, VV>()).setParallelism(parallelism).name("Project low-degree vertices");
    // Edges
    DataSet<Edge<K, EV>> edges = input.getEdges().leftOuterJoin(highDegreeVertices, joinHint).where(reduceOnTargetId.get() ? 1 : 0).equalTo(0).with(new ProjectEdge<K, EV>()).setParallelism(parallelism).name("Project low-degree edges by " + (reduceOnTargetId.get() ? "target" : "source")).leftOuterJoin(highDegreeVertices, joinHint).where(reduceOnTargetId.get() ? 0 : 1).equalTo(0).with(new ProjectEdge<K, EV>()).setParallelism(parallelism).name("Project low-degree edges by " + (reduceOnTargetId.get() ? "source" : "target"));
    // Graph
    return Graph.fromDataSet(vertices, edges, input.getContext());
}
Also used : Vertex(org.apache.flink.graph.Vertex) Tuple1(org.apache.flink.api.java.tuple.Tuple1) JoinHint(org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint) Edge(org.apache.flink.graph.Edge)

Aggregations

JoinHint (org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint)3 OuterJoinType (org.apache.flink.api.common.operators.base.OuterJoinOperatorBase.OuterJoinType)1 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 Edge (org.apache.flink.graph.Edge)1 Vertex (org.apache.flink.graph.Vertex)1 CompilerException (org.apache.flink.optimizer.CompilerException)1 AbstractJoinDescriptor (org.apache.flink.optimizer.operators.AbstractJoinDescriptor)1 OperatorDescriptorDual (org.apache.flink.optimizer.operators.OperatorDescriptorDual)1