Search in sources :

Example 1 with CopyableValueComparator

use of org.apache.flink.api.java.typeutils.runtime.CopyableValueComparator in project flink by apache.

the class MassiveStringValueSorting method testStringValueSorting.

public void testStringValueSorting() {
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<StringValue> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TypeSerializer<StringValue> serializer = new CopyableValueSerializer<StringValue>(StringValue.class);
            TypeComparator<StringValue> comparator = new CopyableValueComparator<StringValue>(true, StringValue.class);
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<StringValue> inputIterator = new StringValueReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<StringValue>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<StringValue>(serializer, StringValue.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            true);
            MutableObjectIterator<StringValue> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            String nextVerify;
            StringValue nextFromFlinkSort = new StringValue();
            while ((nextVerify = verifyReader.readLine()) != null) {
                nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
                Assert.assertNotNull(nextFromFlinkSort);
                Assert.assertEquals(nextVerify, nextFromFlinkSort.getValue());
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            //noinspection ResultOfMethodCallIgnored
            input.delete();
        }
        if (sorted != null) {
            //noinspection ResultOfMethodCallIgnored
            sorted.delete();
        }
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) CopyableValueSerializer(org.apache.flink.api.java.typeutils.runtime.CopyableValueSerializer) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) CopyableValueComparator(org.apache.flink.api.java.typeutils.runtime.CopyableValueComparator) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringValue(org.apache.flink.types.StringValue) File(java.io.File)

Aggregations

BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileReader (java.io.FileReader)1 IOException (java.io.IOException)1 CopyableValueComparator (org.apache.flink.api.java.typeutils.runtime.CopyableValueComparator)1 CopyableValueSerializer (org.apache.flink.api.java.typeutils.runtime.CopyableValueSerializer)1 RuntimeSerializerFactory (org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory)1 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)1 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)1 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)1 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)1 StringValue (org.apache.flink.types.StringValue)1