use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class ContinuousFileProcessingMigrationTest method testReaderSnapshotRestore.
// END OF PREPARATIONS
// TESTS
@Test
public void testReaderSnapshotRestore() throws Exception {
/*
FileInputSplit split1 =
new FileInputSplit(3, new Path("test/test1"), 0, 100, null);
FileInputSplit split2 =
new FileInputSplit(2, new Path("test/test2"), 101, 200, null);
FileInputSplit split3 =
new FileInputSplit(1, new Path("test/test2"), 0, 100, null);
FileInputSplit split4 =
new FileInputSplit(0, new Path("test/test3"), 0, 100, null);
final OneShotLatch latch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<FileInputSplit, ?> initReader = new ContinuousFileReaderOperator<>(format);
initReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<FileInputSplit, FileInputSplit> initTestInstance =
new OneInputStreamOperatorTestHarness<>(initReader);
initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
initTestInstance.open();
// create some state in the reader
initTestInstance.processElement(new StreamRecord<>(split1));
initTestInstance.processElement(new StreamRecord<>(split2));
initTestInstance.processElement(new StreamRecord<>(split3));
initTestInstance.processElement(new StreamRecord<>(split4));
// take a snapshot of the operator's state. This will be used
// to initialize another reader and compare the results of the
// two operators.
final StreamTaskState snapshot;
synchronized (initTestInstance.getCheckpointLock()) {
snapshot = initTestInstance.snapshot(0L, 0L);
}
initTestInstance.snaphotToFile(snapshot, "src/test/resources/reader-migration-test-flink1.1-snapshot");
*/
TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
final OneShotLatch latch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
initReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
initTestInstance.setup();
initTestInstance.initializeStateFromLegacyCheckpoint(getResourceFilename("reader-migration-test-flink1.1-snapshot"));
initTestInstance.open();
latch.trigger();
synchronized (initTestInstance.getCheckpointLock()) {
initTestInstance.close();
}
FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
// compare if the results contain what they should contain and also if
// they are the same, as they should.
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
}
use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class ContinuousFileProcessingTest method testFileReadingOperatorWithEventTime.
@Test
public void testFileReadingOperatorWithEventTime() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
Map<String, Long> modTimes = new HashMap<>();
Map<Integer, String> expectedFileContents = new HashMap<>();
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
modTimes.put(file.f0.getName(), hdfs.getFileStatus(file.f0).getModificationTime());
filesCreated.add(file.f0);
expectedFileContents.put(i, file.f1);
}
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
reader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> tester = new OneInputStreamOperatorTestHarness<>(reader);
tester.setTimeCharacteristic(TimeCharacteristic.EventTime);
tester.open();
// create the necessary splits for the test
FileInputSplit[] splits = format.createInputSplits(reader.getRuntimeContext().getNumberOfParallelSubtasks());
// and feed them to the operator
for (FileInputSplit split : splits) {
tester.processElement(new StreamRecord<>(new TimestampedFileInputSplit(modTimes.get(split.getPath().getName()), split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())));
}
// then close the reader gracefully (and wait to finish reading)
synchronized (tester.getCheckpointLock()) {
tester.close();
}
// the lines received must be the elements in the files +1 for for the longMax watermark
// we are in event time, which emits no watermarks, so the last watermark will mark the
// of the input stream.
Assert.assertEquals(NO_OF_FILES * LINES_PER_FILE + 1, tester.getOutput().size());
Map<Integer, List<String>> actualFileContents = new HashMap<>();
Object lastElement = null;
for (Object line : tester.getOutput()) {
lastElement = line;
if (line instanceof StreamRecord) {
@SuppressWarnings("unchecked") StreamRecord<String> element = (StreamRecord<String>) line;
int fileIdx = Character.getNumericValue(element.getValue().charAt(0));
List<String> content = actualFileContents.get(fileIdx);
if (content == null) {
content = new ArrayList<>();
actualFileContents.put(fileIdx, content);
}
content.add(element.getValue() + "\n");
}
}
// check if the last element is the LongMax watermark
Assert.assertTrue(lastElement instanceof Watermark);
Assert.assertEquals(Long.MAX_VALUE, ((Watermark) lastElement).getTimestamp());
Assert.assertEquals(expectedFileContents.size(), actualFileContents.size());
for (Integer fileIdx : expectedFileContents.keySet()) {
Assert.assertTrue("file" + fileIdx + " not found", actualFileContents.keySet().contains(fileIdx));
List<String> cntnt = actualFileContents.get(fileIdx);
Collections.sort(cntnt, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return getLineNo(o1) - getLineNo(o2);
}
});
StringBuilder cntntStr = new StringBuilder();
for (String line : cntnt) {
cntntStr.append(line);
}
Assert.assertEquals(expectedFileContents.get(fileIdx), cntntStr.toString());
}
for (org.apache.hadoop.fs.Path file : filesCreated) {
hdfs.delete(file, false);
}
}
use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class BoltWrapperTest method testWrapper.
@SuppressWarnings({ "rawtypes", "unchecked" })
private void testWrapper(final int numberOfAttributes) throws Exception {
assert ((-1 <= numberOfAttributes) && (numberOfAttributes <= 25));
Tuple flinkTuple = null;
String rawTuple = null;
if (numberOfAttributes == -1) {
rawTuple = "test";
} else {
flinkTuple = Tuple.getTupleClass(numberOfAttributes).newInstance();
}
final String[] schema;
if (numberOfAttributes == -1) {
schema = new String[1];
} else {
schema = new String[numberOfAttributes];
}
for (int i = 0; i < schema.length; ++i) {
schema[i] = "a" + i;
}
final StreamRecord record = mock(StreamRecord.class);
if (numberOfAttributes == -1) {
when(record.getValue()).thenReturn(rawTuple);
} else {
when(record.getValue()).thenReturn(flinkTuple);
}
final StreamingRuntimeContext taskContext = mock(StreamingRuntimeContext.class);
when(taskContext.getExecutionConfig()).thenReturn(mock(ExecutionConfig.class));
when(taskContext.getTaskName()).thenReturn("name");
when(taskContext.getMetricGroup()).thenReturn(new UnregisteredMetricsGroup());
final IRichBolt bolt = mock(IRichBolt.class);
final SetupOutputFieldsDeclarer declarer = new SetupOutputFieldsDeclarer();
declarer.declare(new Fields(schema));
PowerMockito.whenNew(SetupOutputFieldsDeclarer.class).withNoArguments().thenReturn(declarer);
final BoltWrapper wrapper = new BoltWrapper(bolt, (Fields) null);
wrapper.setup(createMockStreamTask(), new StreamConfig(new Configuration()), mock(Output.class));
wrapper.open();
wrapper.processElement(record);
if (numberOfAttributes == -1) {
verify(bolt).execute(eq(new StormTuple<String>(rawTuple, null, -1, null, null, MessageId.makeUnanchored())));
} else {
verify(bolt).execute(eq(new StormTuple<Tuple>(flinkTuple, null, -1, null, null, MessageId.makeUnanchored())));
}
}
use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class ContinuousFileProcessingTest method testFileReadingOperatorWithIngestionTime.
@Test
public void testFileReadingOperatorWithIngestionTime() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
Map<Integer, String> expectedFileContents = new HashMap<>();
Map<String, Long> modTimes = new HashMap<>();
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
filesCreated.add(file.f0);
modTimes.put(file.f0.getName(), hdfs.getFileStatus(file.f0).getModificationTime());
expectedFileContents.put(i, file.f1);
}
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);
final long watermarkInterval = 10;
ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
final OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> tester = new OneInputStreamOperatorTestHarness<>(reader);
tester.getExecutionConfig().setAutoWatermarkInterval(watermarkInterval);
tester.setTimeCharacteristic(TimeCharacteristic.IngestionTime);
reader.setOutputType(typeInfo, tester.getExecutionConfig());
tester.open();
Assert.assertEquals(TimeCharacteristic.IngestionTime, tester.getTimeCharacteristic());
// test that watermarks are correctly emitted
ConcurrentLinkedQueue<Object> output = tester.getOutput();
tester.setProcessingTime(201);
Assert.assertTrue(output.peek() instanceof Watermark);
Assert.assertEquals(200, ((Watermark) output.poll()).getTimestamp());
tester.setProcessingTime(301);
Assert.assertTrue(output.peek() instanceof Watermark);
Assert.assertEquals(300, ((Watermark) output.poll()).getTimestamp());
tester.setProcessingTime(401);
Assert.assertTrue(output.peek() instanceof Watermark);
Assert.assertEquals(400, ((Watermark) output.poll()).getTimestamp());
tester.setProcessingTime(501);
Assert.assertTrue(output.peek() instanceof Watermark);
Assert.assertEquals(500, ((Watermark) output.poll()).getTimestamp());
Assert.assertTrue(output.isEmpty());
// create the necessary splits for the test
FileInputSplit[] splits = format.createInputSplits(reader.getRuntimeContext().getNumberOfParallelSubtasks());
// and feed them to the operator
Map<Integer, List<String>> actualFileContents = new HashMap<>();
long lastSeenWatermark = Long.MIN_VALUE;
// counter for the lines read from the splits
int lineCounter = 0;
int watermarkCounter = 0;
for (FileInputSplit split : splits) {
// set the next "current processing time".
long nextTimestamp = tester.getProcessingTime() + watermarkInterval;
tester.setProcessingTime(nextTimestamp);
// send the next split to be read and wait until it is fully read, the +1 is for the watermark.
tester.processElement(new StreamRecord<>(new TimestampedFileInputSplit(modTimes.get(split.getPath().getName()), split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())));
// BUT THIS IS JUST FOR THIS TEST
while (tester.getOutput().isEmpty() || tester.getOutput().size() != (LINES_PER_FILE + 1)) {
Thread.sleep(10);
}
// verify that the results are the expected
for (Object line : tester.getOutput()) {
if (line instanceof StreamRecord) {
@SuppressWarnings("unchecked") StreamRecord<String> element = (StreamRecord<String>) line;
lineCounter++;
Assert.assertEquals(nextTimestamp, element.getTimestamp());
int fileIdx = Character.getNumericValue(element.getValue().charAt(0));
List<String> content = actualFileContents.get(fileIdx);
if (content == null) {
content = new ArrayList<>();
actualFileContents.put(fileIdx, content);
}
content.add(element.getValue() + "\n");
} else if (line instanceof Watermark) {
long watermark = ((Watermark) line).getTimestamp();
Assert.assertEquals(nextTimestamp - (nextTimestamp % watermarkInterval), watermark);
Assert.assertTrue(watermark > lastSeenWatermark);
watermarkCounter++;
lastSeenWatermark = watermark;
} else {
Assert.fail("Unknown element in the list.");
}
}
// clean the output to be ready for the next split
tester.getOutput().clear();
}
// now we are processing one split after the other,
// so all the elements must be here by now.
Assert.assertEquals(NO_OF_FILES * LINES_PER_FILE, lineCounter);
// because we expect one watermark per split.
Assert.assertEquals(splits.length, watermarkCounter);
// then close the reader gracefully so that the Long.MAX watermark is emitted
synchronized (tester.getCheckpointLock()) {
tester.close();
}
for (org.apache.hadoop.fs.Path file : filesCreated) {
hdfs.delete(file, false);
}
// check if the last element is the LongMax watermark (by now this must be the only element)
Assert.assertEquals(1, tester.getOutput().size());
Assert.assertTrue(tester.getOutput().peek() instanceof Watermark);
Assert.assertEquals(Long.MAX_VALUE, ((Watermark) tester.getOutput().poll()).getTimestamp());
// check if the elements are the expected ones.
Assert.assertEquals(expectedFileContents.size(), actualFileContents.size());
for (Integer fileIdx : expectedFileContents.keySet()) {
Assert.assertTrue("file" + fileIdx + " not found", actualFileContents.keySet().contains(fileIdx));
List<String> cntnt = actualFileContents.get(fileIdx);
Collections.sort(cntnt, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return getLineNo(o1) - getLineNo(o2);
}
});
StringBuilder cntntStr = new StringBuilder();
for (String line : cntnt) {
cntntStr.append(line);
}
Assert.assertEquals(expectedFileContents.get(fileIdx), cntntStr.toString());
}
}
use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class ContinuousFileProcessingTest method testReaderSnapshotRestore.
@Test
public void testReaderSnapshotRestore() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
final OneShotLatch latch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testBasePath));
TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
initReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
initTestInstance.open();
// create some state in the reader
initTestInstance.processElement(new StreamRecord<>(split1));
initTestInstance.processElement(new StreamRecord<>(split2));
initTestInstance.processElement(new StreamRecord<>(split3));
initTestInstance.processElement(new StreamRecord<>(split4));
// take a snapshot of the operator's state. This will be used
// to initialize another reader and compare the results of the
// two operators.
final OperatorStateHandles snapshot;
synchronized (initTestInstance.getCheckpointLock()) {
snapshot = initTestInstance.snapshot(0L, 0L);
}
ContinuousFileReaderOperator<FileInputSplit> restoredReader = new ContinuousFileReaderOperator<>(new BlockingFileInputFormat(latch, new Path(testBasePath)));
restoredReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> restoredTestInstance = new OneInputStreamOperatorTestHarness<>(restoredReader);
restoredTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
restoredTestInstance.initializeState(snapshot);
restoredTestInstance.open();
// now let computation start
latch.trigger();
synchronized (initTestInstance.getCheckpointLock()) {
initTestInstance.close();
}
synchronized (restoredTestInstance.getCheckpointLock()) {
restoredTestInstance.close();
}
FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
// compare if the results contain what they should contain and also if
// they are the same, as they should.
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
Assert.assertArrayEquals(initTestInstance.getOutput().toArray(), restoredTestInstance.getOutput().toArray());
}
Aggregations