Search in sources :

Example 16 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class ReduceTranslationTests method translateGroupedReduceWithkeyExtractor.

@Test
public void translateGroupedReduceWithkeyExtractor() {
    try {
        final int parallelism = 8;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
        initialData.groupBy(new KeySelector<Tuple3<Double, StringValue, LongValue>, StringValue>() {

            public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) {
                return value.f1;
            }
        }).reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {

            public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
                return value1;
            }
        }).setParallelism(4).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        MapOperatorBase<?, ?, ?> keyProjector = (MapOperatorBase<?, ?, ?>) sink.getInput();
        PlanUnwrappingReduceOperator<?, ?> reducer = (PlanUnwrappingReduceOperator<?, ?>) keyProjector.getInput();
        MapOperatorBase<?, ?, ?> keyExtractor = (MapOperatorBase<?, ?, ?>) reducer.getInput();
        // check the parallelisms
        assertEquals(1, keyExtractor.getParallelism());
        assertEquals(4, reducer.getParallelism());
        assertEquals(4, keyProjector.getParallelism());
        // check types
        TypeInformation<?> keyValueInfo = new TupleTypeInfo<Tuple2<StringValue, Tuple3<Double, StringValue, LongValue>>>(new ValueTypeInfo<StringValue>(StringValue.class), initialData.getType());
        assertEquals(initialData.getType(), keyExtractor.getOperatorInfo().getInputType());
        assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType());
        assertEquals(keyValueInfo, reducer.getOperatorInfo().getInputType());
        assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType());
        assertEquals(keyValueInfo, keyProjector.getOperatorInfo().getInputType());
        assertEquals(initialData.getType(), keyProjector.getOperatorInfo().getOutputType());
        // check keys
        assertEquals(KeyExtractingMapper.class, keyExtractor.getUserCodeWrapper().getUserCodeClass());
        assertTrue(keyExtractor.getInput() instanceof GenericDataSourceBase<?, ?>);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Plan(org.apache.flink.api.common.Plan) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LongValue(org.apache.flink.types.LongValue) StringValue(org.apache.flink.types.StringValue) Test(org.junit.Test)

Example 17 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class ExternalSortLargeRecordsITCase method testSortWithLongAndShortRecordsMixed.

@Test
public void testSortWithLongAndShortRecordsMixed() {
    try {
        final int NUM_RECORDS = 1000000;
        final int LARGE_REC_INTERVAL = 100000;
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeMaybeLongValue>(SomeMaybeLongValue.class) };
        final TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>>(types);
        final TypeSerializer<Tuple2<Long, SomeMaybeLongValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple2<Long, SomeMaybeLongValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
        MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> source = new MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>>() {

            private final Random rnd = new Random(145610843608763871L);

            private int num = -1;

            @Override
            public Tuple2<Long, SomeMaybeLongValue> next(Tuple2<Long, SomeMaybeLongValue> reuse) {
                return next();
            }

            @Override
            public Tuple2<Long, SomeMaybeLongValue> next() {
                if (++num < NUM_RECORDS) {
                    long val = rnd.nextLong();
                    return new Tuple2<Long, SomeMaybeLongValue>(val, new SomeMaybeLongValue((int) val, num % LARGE_REC_INTERVAL == 0));
                } else {
                    return null;
                }
            }
        };
        @SuppressWarnings("unchecked") Sorter<Tuple2<Long, SomeMaybeLongValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SomeMaybeLongValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SomeMaybeLongValue>>(serializer, (Class<Tuple2<Long, SomeMaybeLongValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /*use large record handler*/
        true);
        // check order
        MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> iterator = sorter.getIterator();
        Tuple2<Long, SomeMaybeLongValue> val = serializer.createInstance();
        long prevKey = Long.MAX_VALUE;
        for (int i = 0; i < NUM_RECORDS; i++) {
            val = iterator.next(val);
            assertTrue("Sort order violated", val.f0 <= prevKey);
            assertEquals("Serialization of test data type incorrect", val.f0.intValue(), val.f1.val());
        }
        assertNull(iterator.next(val));
        sorter.close();
        testSuccess = true;
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) Test(org.junit.Test)

Example 18 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class ExternalSortLargeRecordsITCase method testSortWithShortMediumAndLargeRecords.

@Test
public void testSortWithShortMediumAndLargeRecords() {
    try {
        final int NUM_RECORDS = 50000;
        final int LARGE_REC_INTERVAL = 10000;
        final int MEDIUM_REC_INTERVAL = 500;
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SmallOrMediumOrLargeValue>(SmallOrMediumOrLargeValue.class) };
        final TupleTypeInfo<Tuple2<Long, SmallOrMediumOrLargeValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SmallOrMediumOrLargeValue>>(types);
        final TypeSerializer<Tuple2<Long, SmallOrMediumOrLargeValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple2<Long, SmallOrMediumOrLargeValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
        MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>> source = new MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>>() {

            private final Random rnd = new Random(1456108743687167086L);

            private int num = -1;

            @Override
            public Tuple2<Long, SmallOrMediumOrLargeValue> next(Tuple2<Long, SmallOrMediumOrLargeValue> reuse) {
                return next();
            }

            @Override
            public Tuple2<Long, SmallOrMediumOrLargeValue> next() {
                if (++num < NUM_RECORDS) {
                    int size;
                    if (num % LARGE_REC_INTERVAL == 0) {
                        size = SmallOrMediumOrLargeValue.LARGE_SIZE;
                    } else if (num % MEDIUM_REC_INTERVAL == 0) {
                        size = SmallOrMediumOrLargeValue.MEDIUM_SIZE;
                    } else {
                        size = SmallOrMediumOrLargeValue.SMALL_SIZE;
                    }
                    long val = rnd.nextLong();
                    return new Tuple2<Long, SmallOrMediumOrLargeValue>(val, new SmallOrMediumOrLargeValue((int) val, size));
                } else {
                    return null;
                }
            }
        };
        @SuppressWarnings("unchecked") Sorter<Tuple2<Long, SmallOrMediumOrLargeValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SmallOrMediumOrLargeValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SmallOrMediumOrLargeValue>>(serializer, (Class<Tuple2<Long, SmallOrMediumOrLargeValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /*use large record handler*/
        false);
        // check order
        MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>> iterator = sorter.getIterator();
        Tuple2<Long, SmallOrMediumOrLargeValue> val = serializer.createInstance();
        long prevKey = Long.MAX_VALUE;
        for (int i = 0; i < NUM_RECORDS; i++) {
            val = iterator.next(val);
            assertTrue(val.f0 <= prevKey);
            assertTrue(val.f0.intValue() == val.f1.val());
        }
        assertNull(iterator.next(val));
        sorter.close();
        testSuccess = true;
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) Test(org.junit.Test)

Example 19 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class ExternalSortLargeRecordsITCase method testSortWithMediumRecordsOnly.

@Test
public void testSortWithMediumRecordsOnly() {
    try {
        final int NUM_RECORDS = 70;
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SmallOrMediumOrLargeValue>(SmallOrMediumOrLargeValue.class) };
        final TupleTypeInfo<Tuple2<Long, SmallOrMediumOrLargeValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SmallOrMediumOrLargeValue>>(types);
        final TypeSerializer<Tuple2<Long, SmallOrMediumOrLargeValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple2<Long, SmallOrMediumOrLargeValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
        MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>> source = new MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>>() {

            private final Random rnd = new Random(62360187263087678L);

            private int num = -1;

            @Override
            public Tuple2<Long, SmallOrMediumOrLargeValue> next(Tuple2<Long, SmallOrMediumOrLargeValue> reuse) {
                return next();
            }

            @Override
            public Tuple2<Long, SmallOrMediumOrLargeValue> next() {
                if (++num < NUM_RECORDS) {
                    long val = rnd.nextLong();
                    return new Tuple2<Long, SmallOrMediumOrLargeValue>(val, new SmallOrMediumOrLargeValue((int) val, SmallOrMediumOrLargeValue.MEDIUM_SIZE));
                } else {
                    return null;
                }
            }
        };
        @SuppressWarnings("unchecked") Sorter<Tuple2<Long, SmallOrMediumOrLargeValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SmallOrMediumOrLargeValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SmallOrMediumOrLargeValue>>(serializer, (Class<Tuple2<Long, SmallOrMediumOrLargeValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /*use large record handler*/
        true);
        // check order
        MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>> iterator = sorter.getIterator();
        Tuple2<Long, SmallOrMediumOrLargeValue> val = serializer.createInstance();
        long prevKey = Long.MAX_VALUE;
        for (int i = 0; i < NUM_RECORDS; i++) {
            val = iterator.next(val);
            assertTrue(val.f0 <= prevKey);
            assertTrue(val.f0.intValue() == val.f1.val());
        }
        assertNull(iterator.next(val));
        sorter.close();
        testSuccess = true;
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) Test(org.junit.Test)

Example 20 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class ExternalSortLargeRecordsITCase method testSortWithLongRecordsOnly.

// --------------------------------------------------------------------------------------------
@Test
public void testSortWithLongRecordsOnly() {
    try {
        final int NUM_RECORDS = 10;
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeMaybeLongValue>(SomeMaybeLongValue.class) };
        final TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>>(types);
        final TypeSerializer<Tuple2<Long, SomeMaybeLongValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple2<Long, SomeMaybeLongValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
        MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> source = new MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>>() {

            private final Random rnd = new Random(457821643089756298L);

            private int num = 0;

            @Override
            public Tuple2<Long, SomeMaybeLongValue> next(Tuple2<Long, SomeMaybeLongValue> reuse) {
                return next();
            }

            @Override
            public Tuple2<Long, SomeMaybeLongValue> next() {
                if (num++ < NUM_RECORDS) {
                    long val = rnd.nextLong();
                    return new Tuple2<Long, SomeMaybeLongValue>(val, new SomeMaybeLongValue((int) val));
                } else {
                    return null;
                }
            }
        };
        @SuppressWarnings("unchecked") Sorter<Tuple2<Long, SomeMaybeLongValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SomeMaybeLongValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SomeMaybeLongValue>>(serializer, (Class<Tuple2<Long, SomeMaybeLongValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /* use large record handler */
        false);
        // check order
        MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> iterator = sorter.getIterator();
        Tuple2<Long, SomeMaybeLongValue> val = serializer.createInstance();
        long prevKey = Long.MAX_VALUE;
        for (int i = 0; i < NUM_RECORDS; i++) {
            val = iterator.next(val);
            assertTrue(val.f0 <= prevKey);
            assertTrue(val.f0.intValue() == val.f1.val());
        }
        assertNull(iterator.next(val));
        sorter.close();
        testSuccess = true;
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) Test(org.junit.Test)

Aggregations

TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)40 Test (org.junit.Test)25 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)24 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)20 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)10 StringValue (org.apache.flink.types.StringValue)9 IOException (java.io.IOException)8 Random (java.util.Random)8 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)8 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)7 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)7 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)7 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)7 ArrayList (java.util.ArrayList)6 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)6 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)5 MemorySegment (org.apache.flink.core.memory.MemorySegment)5 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)5 IntValue (org.apache.flink.types.IntValue)5