Search in sources :

Example 6 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class MassiveStringValueSorting method testStringValueTuplesSorting.

@SuppressWarnings("unchecked")
public void testStringValueTuplesSorting() {
    final int NUM_STRINGS = 300000;
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStringTuples(NUM_STRINGS, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<Tuple2<StringValue, StringValue[]>> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TupleTypeInfo<Tuple2<StringValue, StringValue[]>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, StringValue[]>>) TypeInfoParser.<Tuple2<StringValue, StringValue[]>>parse("Tuple2<org.apache.flink.types.StringValue, org.apache.flink.types.StringValue[]>");
            TypeSerializer<Tuple2<StringValue, StringValue[]>> serializer = typeInfo.createSerializer(new ExecutionConfig());
            TypeComparator<Tuple2<StringValue, StringValue[]>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> inputIterator = new StringValueTupleReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<Tuple2<StringValue, StringValue[]>>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<Tuple2<StringValue, StringValue[]>>(serializer, (Class<Tuple2<StringValue, StringValue[]>>) (Class<?>) Tuple2.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            false);
            // use this part to verify that all if good when sorting in memory
            //				List<MemorySegment> memory = mm.allocatePages(new DummyInvokable(), mm.computeNumberOfPages(1024*1024*1024));
            //				NormalizedKeySorter<Tuple2<String, String[]>> nks = new NormalizedKeySorter<Tuple2<String,String[]>>(serializer, comparator, memory);
            //
            //				{
            //					Tuple2<String, String[]> wi = new Tuple2<String, String[]>("", new String[0]);
            //					while ((wi = inputIterator.next(wi)) != null) {
            //						Assert.assertTrue(nks.write(wi));
            //					}
            //					
            //					new QuickSort().sort(nks);
            //				}
            //				
            //				MutableObjectIterator<Tuple2<String, String[]>> sortedData = nks.getIterator();
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> verifyIterator = new StringValueTupleReaderMutableObjectIterator(verifyReader);
            Tuple2<StringValue, StringValue[]> nextVerify = new Tuple2<StringValue, StringValue[]>(new StringValue(), new StringValue[0]);
            Tuple2<StringValue, StringValue[]> nextFromFlinkSort = new Tuple2<StringValue, StringValue[]>(new StringValue(), new StringValue[0]);
            int num = 0;
            while ((nextVerify = verifyIterator.next(nextVerify)) != null) {
                num++;
                nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
                Assert.assertNotNull(nextFromFlinkSort);
                Assert.assertEquals(nextVerify.f0, nextFromFlinkSort.f0);
                Assert.assertArrayEquals(nextVerify.f1, nextFromFlinkSort.f1);
            }
            Assert.assertNull(sortedData.next(nextFromFlinkSort));
            Assert.assertEquals(NUM_STRINGS, num);
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            //noinspection ResultOfMethodCallIgnored
            input.delete();
        }
        if (sorted != null) {
            //noinspection ResultOfMethodCallIgnored
            sorted.delete();
        }
    }
}
Also used : RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringValue(org.apache.flink.types.StringValue) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) File(java.io.File)

Example 7 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class ValueCollectionDataSets method getGroupSortedNestedTupleDataSet2.

public static DataSet<Tuple3<Tuple2<IntValue, IntValue>, StringValue, IntValue>> getGroupSortedNestedTupleDataSet2(ExecutionEnvironment env) {
    List<Tuple3<Tuple2<IntValue, IntValue>, StringValue, IntValue>> data = new ArrayList<>();
    data.add(new Tuple3<>(new Tuple2<IntValue, IntValue>(new IntValue(1), new IntValue(3)), new StringValue("a"), new IntValue(2)));
    data.add(new Tuple3<>(new Tuple2<IntValue, IntValue>(new IntValue(1), new IntValue(2)), new StringValue("a"), new IntValue(1)));
    data.add(new Tuple3<>(new Tuple2<IntValue, IntValue>(new IntValue(2), new IntValue(1)), new StringValue("a"), new IntValue(3)));
    data.add(new Tuple3<>(new Tuple2<IntValue, IntValue>(new IntValue(2), new IntValue(2)), new StringValue("b"), new IntValue(4)));
    data.add(new Tuple3<>(new Tuple2<IntValue, IntValue>(new IntValue(3), new IntValue(3)), new StringValue("c"), new IntValue(5)));
    data.add(new Tuple3<>(new Tuple2<IntValue, IntValue>(new IntValue(3), new IntValue(6)), new StringValue("c"), new IntValue(6)));
    data.add(new Tuple3<>(new Tuple2<IntValue, IntValue>(new IntValue(4), new IntValue(9)), new StringValue("c"), new IntValue(7)));
    TupleTypeInfo<Tuple3<Tuple2<IntValue, IntValue>, StringValue, IntValue>> type = new TupleTypeInfo<>(new TupleTypeInfo<Tuple2<IntValue, IntValue>>(ValueTypeInfo.INT_VALUE_TYPE_INFO, ValueTypeInfo.INT_VALUE_TYPE_INFO), ValueTypeInfo.STRING_VALUE_TYPE_INFO, ValueTypeInfo.INT_VALUE_TYPE_INFO);
    return env.fromCollection(data, type);
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) ArrayList(java.util.ArrayList) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo)

Example 8 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class ValueCollectionDataSets method getSmallNestedTupleDataSet.

public static DataSet<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> getSmallNestedTupleDataSet(ExecutionEnvironment env) {
    List<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> data = new ArrayList<>();
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(1), new IntValue(1)), new StringValue("one")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(2), new IntValue(2)), new StringValue("two")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(3), new IntValue(3)), new StringValue("three")));
    TupleTypeInfo<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> type = new TupleTypeInfo<>(new TupleTypeInfo<Tuple2<IntValue, IntValue>>(ValueTypeInfo.INT_VALUE_TYPE_INFO, ValueTypeInfo.INT_VALUE_TYPE_INFO), ValueTypeInfo.STRING_VALUE_TYPE_INFO);
    return env.fromCollection(data, type);
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) ArrayList(java.util.ArrayList) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo)

Example 9 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class ValueCollectionDataSets method getGroupSortedNestedTupleDataSet.

public static DataSet<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> getGroupSortedNestedTupleDataSet(ExecutionEnvironment env) {
    List<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> data = new ArrayList<>();
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(1), new IntValue(3)), new StringValue("a")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(1), new IntValue(2)), new StringValue("a")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(2), new IntValue(1)), new StringValue("a")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(2), new IntValue(2)), new StringValue("b")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(3), new IntValue(3)), new StringValue("c")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(3), new IntValue(6)), new StringValue("c")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(4), new IntValue(9)), new StringValue("c")));
    TupleTypeInfo<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> type = new TupleTypeInfo<>(new TupleTypeInfo<Tuple2<IntValue, IntValue>>(ValueTypeInfo.INT_VALUE_TYPE_INFO, ValueTypeInfo.INT_VALUE_TYPE_INFO), ValueTypeInfo.STRING_VALUE_TYPE_INFO);
    return env.fromCollection(data, type);
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) ArrayList(java.util.ArrayList) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo)

Example 10 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class TableEnvironmentITCase method testAsFromAndToTuple.

@Test
public void testAsFromAndToTuple() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
    Table table = tableEnv.fromDataSet(CollectionDataSets.get3TupleDataSet(env), "a, b, c").select("a, b, c");
    TypeInformation<?> ti = new TupleTypeInfo<Tuple3<Integer, Long, String>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    DataSet<?> ds = tableEnv.toDataSet(table, ti);
    List<?> results = ds.collect();
    String expected = "(1,1,Hi)\n" + "(2,2,Hello)\n" + "(3,2,Hello world)\n" + "(4,3,Hello world, how are you?)\n" + "(5,3,I am fine.)\n" + "(6,3,Luke Skywalker)\n" + "(7,4,Comment#1)\n" + "(8,4,Comment#2)\n" + "(9,4,Comment#3)\n" + "(10,4,Comment#4)\n" + "(11,5,Comment#5)\n" + "(12,5,Comment#6)\n" + "(13,5,Comment#7)\n" + "(14,5,Comment#8)\n" + "(15,5,Comment#9)\n" + "(16,6,Comment#10)\n" + "(17,6,Comment#11)\n" + "(18,6,Comment#12)\n" + "(19,6,Comment#13)\n" + "(20,6,Comment#14)\n" + "(21,6,Comment#15)\n";
    compareResultAsText(results, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Table(org.apache.flink.table.api.Table) BatchTableEnvironment(org.apache.flink.table.api.java.BatchTableEnvironment) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Test(org.junit.Test)

Aggregations

TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)40 Test (org.junit.Test)25 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)24 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)20 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)10 StringValue (org.apache.flink.types.StringValue)9 IOException (java.io.IOException)8 Random (java.util.Random)8 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)8 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)7 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)7 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)7 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)7 ArrayList (java.util.ArrayList)6 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)6 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)5 MemorySegment (org.apache.flink.core.memory.MemorySegment)5 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)5 IntValue (org.apache.flink.types.IntValue)5