Search in sources :

Example 1 with RuntimeSerializerFactory

use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.

the class LargeRecordHandler method addRecord.

// --------------------------------------------------------------------------------------------
@SuppressWarnings("unchecked")
public long addRecord(T record) throws IOException {
    if (recordsOutFile == null) {
        if (closed) {
            throw new IllegalStateException("The large record handler has been closed.");
        }
        if (recordsReader != null) {
            throw new IllegalStateException("The handler has already switched to sorting.");
        }
        LOG.debug("Initializing the large record spilling...");
        // initialize the utilities
        {
            final TypeComparator<?>[] keyComps = comparator.getFlatComparators();
            numKeyFields = keyComps.length;
            Object[] keyHolder = new Object[numKeyFields];
            comparator.extractKeys(record, keyHolder, 0);
            TypeSerializer<?>[] keySers = new TypeSerializer<?>[numKeyFields];
            TypeSerializer<?>[] tupleSers = new TypeSerializer<?>[numKeyFields + 1];
            int[] keyPos = new int[numKeyFields];
            for (int i = 0; i < numKeyFields; i++) {
                keyPos[i] = i;
                keySers[i] = createSerializer(keyHolder[i], i);
                tupleSers[i] = keySers[i];
            }
            // add the long serializer for the offset
            tupleSers[numKeyFields] = LongSerializer.INSTANCE;
            keySerializer = new TupleSerializer<Tuple>((Class<Tuple>) Tuple.getTupleClass(numKeyFields + 1), tupleSers);
            keyComparator = new TupleComparator<Tuple>(keyPos, keyComps, keySers);
            keySerializerFactory = new RuntimeSerializerFactory<Tuple>(keySerializer, keySerializer.getTupleClass());
            keyTuple = keySerializer.createInstance();
        }
        // initialize the spilling
        final int totalNumSegments = memory.size();
        final int segmentsForKeys = (totalNumSegments >= 2 * MAX_SEGMENTS_FOR_KEY_SPILLING) ? MAX_SEGMENTS_FOR_KEY_SPILLING : Math.max(MIN_SEGMENTS_FOR_KEY_SPILLING, totalNumSegments - MAX_SEGMENTS_FOR_KEY_SPILLING);
        List<MemorySegment> recordsMemory = new ArrayList<MemorySegment>();
        List<MemorySegment> keysMemory = new ArrayList<MemorySegment>();
        for (int i = 0; i < segmentsForKeys; i++) {
            keysMemory.add(memory.get(i));
        }
        for (int i = segmentsForKeys; i < totalNumSegments; i++) {
            recordsMemory.add(memory.get(i));
        }
        recordsChannel = ioManager.createChannel();
        keysChannel = ioManager.createChannel();
        recordsOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(recordsChannel), memManager, recordsMemory, memManager.getPageSize());
        keysOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(keysChannel), memManager, keysMemory, memManager.getPageSize());
    }
    final long offset = recordsOutFile.getWriteOffset();
    if (offset < 0) {
        throw new RuntimeException("wrong offset");
    }
    Object[] keyHolder = new Object[numKeyFields];
    comparator.extractKeys(record, keyHolder, 0);
    for (int i = 0; i < numKeyFields; i++) {
        keyTuple.setField(keyHolder[i], i);
    }
    keyTuple.setField(offset, numKeyFields);
    keySerializer.serialize(keyTuple, keysOutFile);
    serializer.serialize(record, recordsOutFile);
    recordCounter++;
    return offset;
}
Also used : FileChannelOutputView(org.apache.flink.runtime.io.disk.FileChannelOutputView) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) ArrayList(java.util.ArrayList) TupleComparator(org.apache.flink.api.java.typeutils.runtime.TupleComparator) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 2 with RuntimeSerializerFactory

use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.

the class MassiveStringSorting method testStringSorting.

public void testStringSorting() {
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<String> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TypeSerializer<String> serializer = StringSerializer.INSTANCE;
            TypeComparator<String> comparator = new StringComparator(true);
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<String> inputIterator = new StringReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<String>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<String>(serializer, String.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            false);
            MutableObjectIterator<String> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            String next;
            while ((next = verifyReader.readLine()) != null) {
                String nextFromStratoSort = sortedData.next("");
                Assert.assertNotNull(nextFromStratoSort);
                Assert.assertEquals(next, nextFromStratoSort);
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            input.delete();
        }
        if (sorted != null) {
            sorted.delete();
        }
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) StringComparator(org.apache.flink.api.common.typeutils.base.StringComparator) IOException(java.io.IOException) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) File(java.io.File)

Example 3 with RuntimeSerializerFactory

use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.

the class MassiveStringSorting method testStringTuplesSorting.

@SuppressWarnings("unchecked")
public void testStringTuplesSorting() {
    final int NUM_STRINGS = 300000;
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStringTuples(NUM_STRINGS, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<Tuple2<String, String[]>> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TupleTypeInfo<Tuple2<String, String[]>> typeInfo = (TupleTypeInfo<Tuple2<String, String[]>>) TypeInfoParser.<Tuple2<String, String[]>>parse("Tuple2<String, String[]>");
            TypeSerializer<Tuple2<String, String[]>> serializer = typeInfo.createSerializer(new ExecutionConfig());
            TypeComparator<Tuple2<String, String[]>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<Tuple2<String, String[]>> inputIterator = new StringTupleReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<Tuple2<String, String[]>>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<Tuple2<String, String[]>>(serializer, (Class<Tuple2<String, String[]>>) (Class<?>) Tuple2.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            false);
            // use this part to verify that all if good when sorting in memory
            //				List<MemorySegment> memory = mm.allocatePages(new DummyInvokable(), mm.computeNumberOfPages(1024*1024*1024));
            //				NormalizedKeySorter<Tuple2<String, String[]>> nks = new NormalizedKeySorter<Tuple2<String,String[]>>(serializer, comparator, memory);
            //
            //				{
            //					Tuple2<String, String[]> wi = new Tuple2<String, String[]>("", new String[0]);
            //					while ((wi = inputIterator.next(wi)) != null) {
            //						Assert.assertTrue(nks.write(wi));
            //					}
            //					
            //					new QuickSort().sort(nks);
            //				}
            //				
            //				MutableObjectIterator<Tuple2<String, String[]>> sortedData = nks.getIterator();
            MutableObjectIterator<Tuple2<String, String[]>> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            MutableObjectIterator<Tuple2<String, String[]>> verifyIterator = new StringTupleReaderMutableObjectIterator(verifyReader);
            Tuple2<String, String[]> next = new Tuple2<String, String[]>("", new String[0]);
            Tuple2<String, String[]> nextFromStratoSort = new Tuple2<String, String[]>("", new String[0]);
            int num = 0;
            while ((next = verifyIterator.next(next)) != null) {
                num++;
                nextFromStratoSort = sortedData.next(nextFromStratoSort);
                Assert.assertNotNull(nextFromStratoSort);
                Assert.assertEquals(next.f0, nextFromStratoSort.f0);
                Assert.assertArrayEquals(next.f1, nextFromStratoSort.f1);
            }
            Assert.assertNull(sortedData.next(nextFromStratoSort));
            Assert.assertEquals(NUM_STRINGS, num);
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            input.delete();
        }
        if (sorted != null) {
            sorted.delete();
        }
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) File(java.io.File)

Example 4 with RuntimeSerializerFactory

use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.

the class MassiveStringValueSorting method testStringValueTuplesSorting.

@SuppressWarnings("unchecked")
public void testStringValueTuplesSorting() {
    final int NUM_STRINGS = 300000;
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStringTuples(NUM_STRINGS, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<Tuple2<StringValue, StringValue[]>> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TupleTypeInfo<Tuple2<StringValue, StringValue[]>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, StringValue[]>>) TypeInfoParser.<Tuple2<StringValue, StringValue[]>>parse("Tuple2<org.apache.flink.types.StringValue, org.apache.flink.types.StringValue[]>");
            TypeSerializer<Tuple2<StringValue, StringValue[]>> serializer = typeInfo.createSerializer(new ExecutionConfig());
            TypeComparator<Tuple2<StringValue, StringValue[]>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> inputIterator = new StringValueTupleReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<Tuple2<StringValue, StringValue[]>>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<Tuple2<StringValue, StringValue[]>>(serializer, (Class<Tuple2<StringValue, StringValue[]>>) (Class<?>) Tuple2.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            false);
            // use this part to verify that all if good when sorting in memory
            //				List<MemorySegment> memory = mm.allocatePages(new DummyInvokable(), mm.computeNumberOfPages(1024*1024*1024));
            //				NormalizedKeySorter<Tuple2<String, String[]>> nks = new NormalizedKeySorter<Tuple2<String,String[]>>(serializer, comparator, memory);
            //
            //				{
            //					Tuple2<String, String[]> wi = new Tuple2<String, String[]>("", new String[0]);
            //					while ((wi = inputIterator.next(wi)) != null) {
            //						Assert.assertTrue(nks.write(wi));
            //					}
            //					
            //					new QuickSort().sort(nks);
            //				}
            //				
            //				MutableObjectIterator<Tuple2<String, String[]>> sortedData = nks.getIterator();
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> verifyIterator = new StringValueTupleReaderMutableObjectIterator(verifyReader);
            Tuple2<StringValue, StringValue[]> nextVerify = new Tuple2<StringValue, StringValue[]>(new StringValue(), new StringValue[0]);
            Tuple2<StringValue, StringValue[]> nextFromFlinkSort = new Tuple2<StringValue, StringValue[]>(new StringValue(), new StringValue[0]);
            int num = 0;
            while ((nextVerify = verifyIterator.next(nextVerify)) != null) {
                num++;
                nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
                Assert.assertNotNull(nextFromFlinkSort);
                Assert.assertEquals(nextVerify.f0, nextFromFlinkSort.f0);
                Assert.assertArrayEquals(nextVerify.f1, nextFromFlinkSort.f1);
            }
            Assert.assertNull(sortedData.next(nextFromFlinkSort));
            Assert.assertEquals(NUM_STRINGS, num);
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            //noinspection ResultOfMethodCallIgnored
            input.delete();
        }
        if (sorted != null) {
            //noinspection ResultOfMethodCallIgnored
            sorted.delete();
        }
    }
}
Also used : RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringValue(org.apache.flink.types.StringValue) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) File(java.io.File)

Example 5 with RuntimeSerializerFactory

use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.

the class MassiveStringValueSorting method testStringValueSorting.

public void testStringValueSorting() {
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<StringValue> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TypeSerializer<StringValue> serializer = new CopyableValueSerializer<StringValue>(StringValue.class);
            TypeComparator<StringValue> comparator = new CopyableValueComparator<StringValue>(true, StringValue.class);
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<StringValue> inputIterator = new StringValueReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<StringValue>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<StringValue>(serializer, StringValue.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            true);
            MutableObjectIterator<StringValue> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            String nextVerify;
            StringValue nextFromFlinkSort = new StringValue();
            while ((nextVerify = verifyReader.readLine()) != null) {
                nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
                Assert.assertNotNull(nextFromFlinkSort);
                Assert.assertEquals(nextVerify, nextFromFlinkSort.getValue());
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            //noinspection ResultOfMethodCallIgnored
            input.delete();
        }
        if (sorted != null) {
            //noinspection ResultOfMethodCallIgnored
            sorted.delete();
        }
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) CopyableValueSerializer(org.apache.flink.api.java.typeutils.runtime.CopyableValueSerializer) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) CopyableValueComparator(org.apache.flink.api.java.typeutils.runtime.CopyableValueComparator) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringValue(org.apache.flink.types.StringValue) File(java.io.File)

Aggregations

RuntimeSerializerFactory (org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory)5 BufferedReader (java.io.BufferedReader)4 File (java.io.File)4 FileReader (java.io.FileReader)4 IOException (java.io.IOException)4 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)4 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)4 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)4 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)4 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)2 StringValue (org.apache.flink.types.StringValue)2 ArrayList (java.util.ArrayList)1 StringComparator (org.apache.flink.api.common.typeutils.base.StringComparator)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 CopyableValueComparator (org.apache.flink.api.java.typeutils.runtime.CopyableValueComparator)1 CopyableValueSerializer (org.apache.flink.api.java.typeutils.runtime.CopyableValueSerializer)1 TupleComparator (org.apache.flink.api.java.typeutils.runtime.TupleComparator)1 TupleSerializer (org.apache.flink.api.java.typeutils.runtime.TupleSerializer)1