Search in sources :

Example 1 with FileReader

use of java.io.FileReader in project camel by apache.

the class FileDataSet method readSourceFile.

// Implementation methods
//-------------------------------------------------------------------------
private void readSourceFile() throws IOException {
    List<Object> bodies = new LinkedList<>();
    try (BufferedReader br = new BufferedReader(new FileReader(sourceFile))) {
        Scanner scanner = new Scanner(br);
        scanner.useDelimiter(delimiter);
        while (scanner.hasNext()) {
            String nextPayload = scanner.next();
            if ((nextPayload != null) && (nextPayload.length() > 0)) {
                bodies.add(nextPayload);
            }
        }
        setDefaultBodies(bodies);
    }
}
Also used : Scanner(java.util.Scanner) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) LinkedList(java.util.LinkedList)

Example 2 with FileReader

use of java.io.FileReader in project flink by apache.

the class MassiveStringSorting method testStringSorting.

public void testStringSorting() {
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<String> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TypeSerializer<String> serializer = StringSerializer.INSTANCE;
            TypeComparator<String> comparator = new StringComparator(true);
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<String> inputIterator = new StringReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<String>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<String>(serializer, String.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            false);
            MutableObjectIterator<String> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            String next;
            while ((next = verifyReader.readLine()) != null) {
                String nextFromStratoSort = sortedData.next("");
                Assert.assertNotNull(nextFromStratoSort);
                Assert.assertEquals(next, nextFromStratoSort);
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            input.delete();
        }
        if (sorted != null) {
            sorted.delete();
        }
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) StringComparator(org.apache.flink.api.common.typeutils.base.StringComparator) IOException(java.io.IOException) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) File(java.io.File)

Example 3 with FileReader

use of java.io.FileReader in project flink by apache.

the class MassiveStringSorting method testStringTuplesSorting.

@SuppressWarnings("unchecked")
public void testStringTuplesSorting() {
    final int NUM_STRINGS = 300000;
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStringTuples(NUM_STRINGS, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<Tuple2<String, String[]>> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TupleTypeInfo<Tuple2<String, String[]>> typeInfo = (TupleTypeInfo<Tuple2<String, String[]>>) TypeInfoParser.<Tuple2<String, String[]>>parse("Tuple2<String, String[]>");
            TypeSerializer<Tuple2<String, String[]>> serializer = typeInfo.createSerializer(new ExecutionConfig());
            TypeComparator<Tuple2<String, String[]>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<Tuple2<String, String[]>> inputIterator = new StringTupleReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<Tuple2<String, String[]>>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<Tuple2<String, String[]>>(serializer, (Class<Tuple2<String, String[]>>) (Class<?>) Tuple2.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            false);
            // use this part to verify that all if good when sorting in memory
            //				List<MemorySegment> memory = mm.allocatePages(new DummyInvokable(), mm.computeNumberOfPages(1024*1024*1024));
            //				NormalizedKeySorter<Tuple2<String, String[]>> nks = new NormalizedKeySorter<Tuple2<String,String[]>>(serializer, comparator, memory);
            //
            //				{
            //					Tuple2<String, String[]> wi = new Tuple2<String, String[]>("", new String[0]);
            //					while ((wi = inputIterator.next(wi)) != null) {
            //						Assert.assertTrue(nks.write(wi));
            //					}
            //					
            //					new QuickSort().sort(nks);
            //				}
            //				
            //				MutableObjectIterator<Tuple2<String, String[]>> sortedData = nks.getIterator();
            MutableObjectIterator<Tuple2<String, String[]>> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            MutableObjectIterator<Tuple2<String, String[]>> verifyIterator = new StringTupleReaderMutableObjectIterator(verifyReader);
            Tuple2<String, String[]> next = new Tuple2<String, String[]>("", new String[0]);
            Tuple2<String, String[]> nextFromStratoSort = new Tuple2<String, String[]>("", new String[0]);
            int num = 0;
            while ((next = verifyIterator.next(next)) != null) {
                num++;
                nextFromStratoSort = sortedData.next(nextFromStratoSort);
                Assert.assertNotNull(nextFromStratoSort);
                Assert.assertEquals(next.f0, nextFromStratoSort.f0);
                Assert.assertArrayEquals(next.f1, nextFromStratoSort.f1);
            }
            Assert.assertNull(sortedData.next(nextFromStratoSort));
            Assert.assertEquals(NUM_STRINGS, num);
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            input.delete();
        }
        if (sorted != null) {
            sorted.delete();
        }
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) File(java.io.File)

Example 4 with FileReader

use of java.io.FileReader in project flink by apache.

the class MassiveStringValueSorting method testStringValueTuplesSorting.

@SuppressWarnings("unchecked")
public void testStringValueTuplesSorting() {
    final int NUM_STRINGS = 300000;
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStringTuples(NUM_STRINGS, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<Tuple2<StringValue, StringValue[]>> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TupleTypeInfo<Tuple2<StringValue, StringValue[]>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, StringValue[]>>) TypeInfoParser.<Tuple2<StringValue, StringValue[]>>parse("Tuple2<org.apache.flink.types.StringValue, org.apache.flink.types.StringValue[]>");
            TypeSerializer<Tuple2<StringValue, StringValue[]>> serializer = typeInfo.createSerializer(new ExecutionConfig());
            TypeComparator<Tuple2<StringValue, StringValue[]>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> inputIterator = new StringValueTupleReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<Tuple2<StringValue, StringValue[]>>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<Tuple2<StringValue, StringValue[]>>(serializer, (Class<Tuple2<StringValue, StringValue[]>>) (Class<?>) Tuple2.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            false);
            // use this part to verify that all if good when sorting in memory
            //				List<MemorySegment> memory = mm.allocatePages(new DummyInvokable(), mm.computeNumberOfPages(1024*1024*1024));
            //				NormalizedKeySorter<Tuple2<String, String[]>> nks = new NormalizedKeySorter<Tuple2<String,String[]>>(serializer, comparator, memory);
            //
            //				{
            //					Tuple2<String, String[]> wi = new Tuple2<String, String[]>("", new String[0]);
            //					while ((wi = inputIterator.next(wi)) != null) {
            //						Assert.assertTrue(nks.write(wi));
            //					}
            //					
            //					new QuickSort().sort(nks);
            //				}
            //				
            //				MutableObjectIterator<Tuple2<String, String[]>> sortedData = nks.getIterator();
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            MutableObjectIterator<Tuple2<StringValue, StringValue[]>> verifyIterator = new StringValueTupleReaderMutableObjectIterator(verifyReader);
            Tuple2<StringValue, StringValue[]> nextVerify = new Tuple2<StringValue, StringValue[]>(new StringValue(), new StringValue[0]);
            Tuple2<StringValue, StringValue[]> nextFromFlinkSort = new Tuple2<StringValue, StringValue[]>(new StringValue(), new StringValue[0]);
            int num = 0;
            while ((nextVerify = verifyIterator.next(nextVerify)) != null) {
                num++;
                nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
                Assert.assertNotNull(nextFromFlinkSort);
                Assert.assertEquals(nextVerify.f0, nextFromFlinkSort.f0);
                Assert.assertArrayEquals(nextVerify.f1, nextFromFlinkSort.f1);
            }
            Assert.assertNull(sortedData.next(nextFromFlinkSort));
            Assert.assertEquals(NUM_STRINGS, num);
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            //noinspection ResultOfMethodCallIgnored
            input.delete();
        }
        if (sorted != null) {
            //noinspection ResultOfMethodCallIgnored
            sorted.delete();
        }
    }
}
Also used : RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringValue(org.apache.flink.types.StringValue) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) File(java.io.File)

Example 5 with FileReader

use of java.io.FileReader in project flink by apache.

the class DataSinkTaskTest method testDataSinkTask.

@Test
public void testDataSinkTask() {
    FileReader fr = null;
    BufferedReader br = null;
    try {
        int keyCnt = 100;
        int valCnt = 20;
        super.initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
        super.addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
        DataSinkTask<Record> testTask = new DataSinkTask<>();
        super.registerFileOutputTask(testTask, MockOutputFormat.class, new File(tempTestPath).toURI().toString());
        testTask.invoke();
        File tempTestFile = new File(this.tempTestPath);
        Assert.assertTrue("Temp output file does not exist", tempTestFile.exists());
        fr = new FileReader(tempTestFile);
        br = new BufferedReader(fr);
        HashMap<Integer, HashSet<Integer>> keyValueCountMap = new HashMap<>(keyCnt);
        while (br.ready()) {
            String line = br.readLine();
            Integer key = Integer.parseInt(line.substring(0, line.indexOf("_")));
            Integer val = Integer.parseInt(line.substring(line.indexOf("_") + 1, line.length()));
            if (!keyValueCountMap.containsKey(key)) {
                keyValueCountMap.put(key, new HashSet<Integer>());
            }
            keyValueCountMap.get(key).add(val);
        }
        Assert.assertTrue("Invalid key count in out file. Expected: " + keyCnt + " Actual: " + keyValueCountMap.keySet().size(), keyValueCountMap.keySet().size() == keyCnt);
        for (Integer key : keyValueCountMap.keySet()) {
            Assert.assertTrue("Invalid value count for key: " + key + ". Expected: " + valCnt + " Actual: " + keyValueCountMap.get(key).size(), keyValueCountMap.get(key).size() == valCnt);
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (br != null) {
            try {
                br.close();
            } catch (Throwable t) {
            }
        }
        if (fr != null) {
            try {
                fr.close();
            } catch (Throwable t) {
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) Record(org.apache.flink.types.Record) UniformRecordGenerator(org.apache.flink.runtime.operators.testutils.UniformRecordGenerator) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

FileReader (java.io.FileReader)1602 BufferedReader (java.io.BufferedReader)1114 IOException (java.io.IOException)762 File (java.io.File)701 FileNotFoundException (java.io.FileNotFoundException)264 ArrayList (java.util.ArrayList)250 Test (org.junit.Test)154 FileWriter (java.io.FileWriter)121 HashMap (java.util.HashMap)103 Reader (java.io.Reader)86 BufferedWriter (java.io.BufferedWriter)78 Properties (java.util.Properties)57 InputStreamReader (java.io.InputStreamReader)54 Map (java.util.Map)54 List (java.util.List)51 Matcher (java.util.regex.Matcher)49 LineNumberReader (java.io.LineNumberReader)46 HashSet (java.util.HashSet)43 FileInputStream (java.io.FileInputStream)41 PrintWriter (java.io.PrintWriter)40