Search in sources :

Example 1 with InnerJoinOperatorBase

use of org.apache.flink.api.common.operators.base.InnerJoinOperatorBase in project flink by apache.

the class NamesTest method testJoinWith.

@Test
public void testJoinWith() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    List<Tuple1<String>> strLi = new ArrayList<Tuple1<String>>();
    strLi.add(new Tuple1<String>("a"));
    strLi.add(new Tuple1<String>("b"));
    DataSet<Tuple1<String>> strs = env.fromCollection(strLi);
    DataSet<Tuple1<String>> strs1 = env.fromCollection(strLi);
    strs.join(strs1).where(0).equalTo(0).with(new FlatJoinFunction<Tuple1<String>, Tuple1<String>, String>() {

        @Override
        public void join(Tuple1<String> first, Tuple1<String> second, Collector<String> out) throws Exception {
        //
        }
    }).output(new DiscardingOutputFormat<String>());
    Plan plan = env.createProgramPlan();
    plan.accept(new Visitor<Operator<?>>() {

        @Override
        public boolean preVisit(Operator<?> visitable) {
            if (visitable instanceof InnerJoinOperatorBase) {
                Assert.assertEquals("Join at testJoinWith(NamesTest.java:93)", visitable.getName());
            }
            return true;
        }

        @Override
        public void postVisit(Operator<?> visitable) {
        }
    });
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) PlanFilterOperator(org.apache.flink.api.java.operators.translation.PlanFilterOperator) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ArrayList(java.util.ArrayList) FlatJoinFunction(org.apache.flink.api.common.functions.FlatJoinFunction) Plan(org.apache.flink.api.common.Plan) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Collector(org.apache.flink.util.Collector) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Test(org.junit.Test)

Example 2 with InnerJoinOperatorBase

use of org.apache.flink.api.common.operators.base.InnerJoinOperatorBase in project flink by apache.

the class DeltaIterationTranslationTest method testCorrectTranslation.

@Test
public void testCorrectTranslation() {
    try {
        final String JOB_NAME = "Test JobName";
        final String ITERATION_NAME = "Test Name";
        final String BEFORE_NEXT_WORKSET_MAP = "Some Mapper";
        final String AGGREGATOR_NAME = "AggregatorName";
        final int[] ITERATION_KEYS = new int[] { 2 };
        final int NUM_ITERATIONS = 13;
        final int DEFAULT_parallelism = 133;
        final int ITERATION_parallelism = 77;
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        // ------------ construct the test program ------------------
        {
            env.setParallelism(DEFAULT_parallelism);
            @SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
            @SuppressWarnings("unchecked") DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
            DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, NUM_ITERATIONS, ITERATION_KEYS);
            iteration.name(ITERATION_NAME).parallelism(ITERATION_parallelism);
            iteration.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
            // test that multiple workset consumers are supported
            DataSet<Tuple2<Double, String>> worksetSelfJoin = iteration.getWorkset().map(new IdentityMapper<Tuple2<Double, String>>()).join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1);
            DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());
            DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(joined, joined.map(new NextWorksetMapper()).name(BEFORE_NEXT_WORKSET_MAP));
            result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
            result.writeAsText("/dev/null");
        }
        Plan p = env.createProgramPlan(JOB_NAME);
        // ------------- validate the plan ----------------
        assertEquals(JOB_NAME, p.getJobName());
        assertEquals(DEFAULT_parallelism, p.getDefaultParallelism());
        // validate the iteration
        GenericDataSinkBase<?> sink1, sink2;
        {
            Iterator<? extends GenericDataSinkBase<?>> sinks = p.getDataSinks().iterator();
            sink1 = sinks.next();
            sink2 = sinks.next();
        }
        DeltaIterationBase<?, ?> iteration = (DeltaIterationBase<?, ?>) sink1.getInput();
        // check that multi consumer translation works for iterations
        assertEquals(iteration, sink2.getInput());
        // check the basic iteration properties
        assertEquals(NUM_ITERATIONS, iteration.getMaximumNumberOfIterations());
        assertArrayEquals(ITERATION_KEYS, iteration.getSolutionSetKeyFields());
        assertEquals(ITERATION_parallelism, iteration.getParallelism());
        assertEquals(ITERATION_NAME, iteration.getName());
        MapOperatorBase<?, ?, ?> nextWorksetMapper = (MapOperatorBase<?, ?, ?>) iteration.getNextWorkset();
        InnerJoinOperatorBase<?, ?, ?, ?> solutionSetJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) iteration.getSolutionSetDelta();
        InnerJoinOperatorBase<?, ?, ?, ?> worksetSelfJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) solutionSetJoin.getFirstInput();
        MapOperatorBase<?, ?, ?> worksetMapper = (MapOperatorBase<?, ?, ?>) worksetSelfJoin.getFirstInput();
        assertEquals(IdentityMapper.class, worksetMapper.getUserCodeWrapper().getUserCodeClass());
        assertEquals(NextWorksetMapper.class, nextWorksetMapper.getUserCodeWrapper().getUserCodeClass());
        if (solutionSetJoin.getUserCodeWrapper().getUserCodeObject() instanceof WrappingFunction) {
            WrappingFunction<?> wf = (WrappingFunction<?>) solutionSetJoin.getUserCodeWrapper().getUserCodeObject();
            assertEquals(SolutionWorksetJoin.class, wf.getWrappedFunction().getClass());
        } else {
            assertEquals(SolutionWorksetJoin.class, solutionSetJoin.getUserCodeWrapper().getUserCodeClass());
        }
        assertEquals(BEFORE_NEXT_WORKSET_MAP, nextWorksetMapper.getName());
        assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) DataSet(org.apache.flink.api.java.DataSet) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) Iterator(java.util.Iterator) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) DeltaIteration(org.apache.flink.api.java.operators.DeltaIteration) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Plan(org.apache.flink.api.common.Plan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 3 with InnerJoinOperatorBase

use of org.apache.flink.api.common.operators.base.InnerJoinOperatorBase in project flink by apache.

the class SemanticPropertiesTranslationTest method testBinaryForwardedInLine2.

@Test
public void testBinaryForwardedInLine2() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(3l, 4l));
    @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input2 = env.fromElements(new Tuple2<Long, Long>(3l, 2l));
    input1.join(input2).where(0).equalTo(0).with(new ReadSetJoin<Long>()).withForwardedFieldsFirst("0->1; 1->2").withForwardedFieldsSecond("1->0").output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
    Plan plan = env.createProgramPlan();
    GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
    InnerJoinOperatorBase<?, ?, ?, ?> join = (InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput();
    DualInputSemanticProperties semantics = join.getSemanticProperties();
    assertNotNull(semantics.getForwardingTargetFields(1, 0));
    assertEquals(1, semantics.getForwardingTargetFields(0, 0).size());
    assertEquals(1, semantics.getForwardingTargetFields(0, 1).size());
    assertEquals(1, semantics.getForwardingTargetFields(1, 1).size());
    assertTrue(semantics.getForwardingTargetFields(0, 0).contains(1));
    assertTrue(semantics.getForwardingTargetFields(0, 1).contains(2));
    assertTrue(semantics.getForwardingTargetFields(1, 1).contains(0));
    assertNotNull(semantics.getReadFields(0));
    assertNotNull(semantics.getReadFields(1));
    assertEquals(1, semantics.getReadFields(0).size());
    assertEquals(1, semantics.getReadFields(1).size());
    assertTrue(semantics.getReadFields(0).contains(1));
    assertTrue(semantics.getReadFields(1).contains(0));
    assertEquals(0, semantics.getForwardingTargetFields(1, 0).size());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) DualInputSemanticProperties(org.apache.flink.api.common.operators.DualInputSemanticProperties) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Test(org.junit.Test)

Example 4 with InnerJoinOperatorBase

use of org.apache.flink.api.common.operators.base.InnerJoinOperatorBase in project flink by apache.

the class SemanticPropertiesTranslationTest method testBinaryForwardedAnnotationInLineMixed1.

@Test
public void testBinaryForwardedAnnotationInLineMixed1() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(3l, 4l));
    @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input2 = env.fromElements(new Tuple2<Long, Long>(3l, 2l));
    input1.join(input2).where(0).equalTo(0).with(new ForwardedFirstAnnotationJoin<Long>()).withForwardedFieldsSecond("1").output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
    Plan plan = env.createProgramPlan();
    GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
    InnerJoinOperatorBase<?, ?, ?, ?> join = (InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput();
    DualInputSemanticProperties semantics = join.getSemanticProperties();
    assertNotNull(semantics.getForwardingTargetFields(0, 1));
    assertNotNull(semantics.getForwardingTargetFields(1, 0));
    assertNotNull(semantics.getForwardingTargetFields(0, 0));
    assertNotNull(semantics.getForwardingTargetFields(1, 1));
    assertEquals(1, semantics.getForwardingTargetFields(0, 0).size());
    assertEquals(1, semantics.getForwardingTargetFields(1, 1).size());
    assertTrue(semantics.getForwardingTargetFields(0, 0).contains(2));
    assertTrue(semantics.getForwardingTargetFields(1, 1).contains(1));
    assertEquals(0, semantics.getForwardingTargetFields(0, 1).size());
    assertEquals(0, semantics.getForwardingTargetFields(1, 0).size());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) DualInputSemanticProperties(org.apache.flink.api.common.operators.DualInputSemanticProperties) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Test(org.junit.Test)

Example 5 with InnerJoinOperatorBase

use of org.apache.flink.api.common.operators.base.InnerJoinOperatorBase in project flink by apache.

the class SemanticPropertiesTranslationTest method testBinaryAllForwardedExceptAnnotation.

@Test
public void testBinaryAllForwardedExceptAnnotation() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input1 = env.fromElements(new Tuple3<Long, Long, Long>(3l, 4l, 5l));
    @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(3l, 2l, 1l));
    input1.join(input2).where(0).equalTo(0).with(new AllForwardedExceptJoin<Long>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
    Plan plan = env.createProgramPlan();
    GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
    InnerJoinOperatorBase<?, ?, ?, ?> join = (InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput();
    DualInputSemanticProperties semantics = join.getSemanticProperties();
    assertNotNull(semantics.getForwardingTargetFields(0, 0));
    assertNotNull(semantics.getForwardingTargetFields(0, 2));
    assertNotNull(semantics.getForwardingTargetFields(1, 0));
    assertNotNull(semantics.getForwardingTargetFields(1, 1));
    assertEquals(1, semantics.getForwardingTargetFields(0, 1).size());
    assertEquals(1, semantics.getForwardingTargetFields(1, 2).size());
    assertTrue(semantics.getForwardingTargetFields(0, 1).contains(1));
    assertTrue(semantics.getForwardingTargetFields(1, 2).contains(2));
    assertEquals(0, semantics.getForwardingTargetFields(0, 0).size());
    assertEquals(0, semantics.getForwardingTargetFields(0, 2).size());
    assertEquals(0, semantics.getForwardingTargetFields(1, 0).size());
    assertEquals(0, semantics.getForwardingTargetFields(1, 1).size());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) DualInputSemanticProperties(org.apache.flink.api.common.operators.DualInputSemanticProperties) Tuple3(org.apache.flink.api.java.tuple.Tuple3) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Test(org.junit.Test)

Aggregations

InnerJoinOperatorBase (org.apache.flink.api.common.operators.base.InnerJoinOperatorBase)11 Plan (org.apache.flink.api.common.Plan)10 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)10 Test (org.junit.Test)10 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)8 DualInputSemanticProperties (org.apache.flink.api.common.operators.DualInputSemanticProperties)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)2 GenericDataSinkBase (org.apache.flink.api.common.operators.GenericDataSinkBase)2 DeltaIterationBase (org.apache.flink.api.common.operators.base.DeltaIterationBase)2 MapOperatorBase (org.apache.flink.api.common.operators.base.MapOperatorBase)2 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1 LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)1 FlatJoinFunction (org.apache.flink.api.common.functions.FlatJoinFunction)1 GenericDataSourceBase (org.apache.flink.api.common.operators.GenericDataSourceBase)1 Operator (org.apache.flink.api.common.operators.Operator)1 Union (org.apache.flink.api.common.operators.Union)1 BulkIterationBase (org.apache.flink.api.common.operators.base.BulkIterationBase)1 CoGroupOperatorBase (org.apache.flink.api.common.operators.base.CoGroupOperatorBase)1