use of org.apache.flink.configuration.Configuration in project flink by apache.
the class DelimitedInputFormatTest method testMultiCharDelimiter.
@Test
public void testMultiCharDelimiter() throws IOException {
final String myString = "www112xx1123yyy11123zzzzz1123";
final FileInputSplit split = createTempFile(myString);
final Configuration parameters = new Configuration();
format.setDelimiter("1123");
format.configure(parameters);
format.open(split);
String first = format.nextRecord(null);
assertNotNull(first);
assertEquals("www112xx", first);
String second = format.nextRecord(null);
assertNotNull(second);
assertEquals("yyy1", second);
String third = format.nextRecord(null);
assertNotNull(third);
assertEquals("zzzzz", third);
assertNull(format.nextRecord(null));
assertTrue(format.reachedEnd());
}
use of org.apache.flink.configuration.Configuration in project flink by apache.
the class EnumerateNestedFilesTest method setup.
@Before
public void setup() {
this.config = new Configuration();
format = new DummyFileInputFormat();
}
use of org.apache.flink.configuration.Configuration in project flink by apache.
the class FileInputFormatTest method testGetStatisticsMultipleFilesWithCachedVersion.
@Test
public void testGetStatisticsMultipleFilesWithCachedVersion() {
try {
final long SIZE1 = 2077;
final long SIZE2 = 31909;
final long SIZE3 = 10;
final long TOTAL = SIZE1 + SIZE2 + SIZE3;
final long FAKE_SIZE = 10065;
String tempDir = TestFileUtils.createTempFileDir(SIZE1, SIZE2, SIZE3);
DummyFileInputFormat format = new DummyFileInputFormat();
format.setFilePath(tempDir);
format.configure(new Configuration());
FileBaseStatistics stats = format.getStatistics(null);
Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
format = new DummyFileInputFormat();
format.setFilePath(tempDir);
format.configure(new Configuration());
FileBaseStatistics newStats = format.getStatistics(stats);
Assert.assertTrue("Statistics object was changed", newStats == stats);
// insert fake stats with the correct modification time. the call should return the fake stats
format = new DummyFileInputFormat();
format.setFilePath(tempDir);
format.configure(new Configuration());
FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
BaseStatistics latest = format.getStatistics(fakeStats);
Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
// insert fake stats with the correct modification time. the call should return the fake stats
format = new DummyFileInputFormat();
format.setFilePath(tempDir);
format.configure(new Configuration());
FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
}
}
use of org.apache.flink.configuration.Configuration in project flink by apache.
the class DelimitedInputFormatTest method testReadWithBufferSizeIsMultiple.
/**
* Tests that the correct number of records is read when the split boundary is exact at the record boundary.
*/
@Test
public void testReadWithBufferSizeIsMultiple() throws IOException {
final String myString = "aaaaaaa\nbbbbbbb\nccccccc\nddddddd\n";
final FileInputSplit split = createTempFile(myString);
FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames());
FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames());
final Configuration parameters = new Configuration();
format.setBufferSize(2 * ((int) split1.getLength()));
format.configure(parameters);
String next;
int count = 0;
// read split 1
format.open(split1);
while ((next = format.nextRecord(null)) != null) {
assertEquals(7, next.length());
count++;
}
assertNull(format.nextRecord(null));
assertTrue(format.reachedEnd());
format.close();
// this one must have read one too many, because the next split will skipp the trailing remainder
// which happens to be one full record
assertEquals(3, count);
// read split 2
format.open(split2);
while ((next = format.nextRecord(null)) != null) {
assertEquals(7, next.length());
count++;
}
format.close();
assertEquals(4, count);
}
use of org.apache.flink.configuration.Configuration in project flink by apache.
the class InnerJoinOperatorBaseTest method testJoinRich.
@Test
public void testJoinRich() {
final AtomicBoolean opened = new AtomicBoolean(false);
final AtomicBoolean closed = new AtomicBoolean(false);
final String taskName = "Test rich join function";
final RichFlatJoinFunction<String, String, Integer> joiner = new RichFlatJoinFunction<String, String, Integer>() {
@Override
public void open(Configuration parameters) throws Exception {
opened.compareAndSet(false, true);
assertEquals(0, getRuntimeContext().getIndexOfThisSubtask());
assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
}
@Override
public void close() throws Exception {
closed.compareAndSet(false, true);
}
@Override
public void join(String first, String second, Collector<Integer> out) throws Exception {
out.collect(first.length());
out.collect(second.length());
}
};
InnerJoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>> base = new InnerJoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>>(joiner, new BinaryOperatorInformation<String, String, Integer>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], taskName);
final List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar"));
final List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo"));
final List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6, 6));
try {
final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
final HashMap<String, Accumulator<?, ?>> accumulatorMap = new HashMap<String, Accumulator<?, ?>>();
final HashMap<String, Future<Path>> cpTasks = new HashMap<>();
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
executionConfig.enableObjectReuse();
List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
assertEquals(expected, resultSafe);
assertEquals(expected, resultRegular);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(opened.get());
assertTrue(closed.get());
}
Aggregations