use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class ContinuousFileProcessingMigrationTest method testReaderSnapshotRestore.
// END OF PREPARATIONS
// TESTS
@Test
public void testReaderSnapshotRestore() throws Exception {
/*
FileInputSplit split1 =
new FileInputSplit(3, new Path("test/test1"), 0, 100, null);
FileInputSplit split2 =
new FileInputSplit(2, new Path("test/test2"), 101, 200, null);
FileInputSplit split3 =
new FileInputSplit(1, new Path("test/test2"), 0, 100, null);
FileInputSplit split4 =
new FileInputSplit(0, new Path("test/test3"), 0, 100, null);
final OneShotLatch latch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<FileInputSplit, ?> initReader = new ContinuousFileReaderOperator<>(format);
initReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<FileInputSplit, FileInputSplit> initTestInstance =
new OneInputStreamOperatorTestHarness<>(initReader);
initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
initTestInstance.open();
// create some state in the reader
initTestInstance.processElement(new StreamRecord<>(split1));
initTestInstance.processElement(new StreamRecord<>(split2));
initTestInstance.processElement(new StreamRecord<>(split3));
initTestInstance.processElement(new StreamRecord<>(split4));
// take a snapshot of the operator's state. This will be used
// to initialize another reader and compare the results of the
// two operators.
final StreamTaskState snapshot;
synchronized (initTestInstance.getCheckpointLock()) {
snapshot = initTestInstance.snapshot(0L, 0L);
}
initTestInstance.snaphotToFile(snapshot, "src/test/resources/reader-migration-test-flink1.1-snapshot");
*/
TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
final OneShotLatch latch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
initReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
initTestInstance.setup();
initTestInstance.initializeStateFromLegacyCheckpoint(getResourceFilename("reader-migration-test-flink1.1-snapshot"));
initTestInstance.open();
latch.trigger();
synchronized (initTestInstance.getCheckpointLock()) {
initTestInstance.close();
}
FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
// compare if the results contain what they should contain and also if
// they are the same, as they should.
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class AvroRecordInputFormatTest method testDeserialisation.
/**
* Test if the AvroInputFormat is able to properly read data from an avro file.
* @throws IOException
*/
@Test
public void testDeserialisation() throws IOException {
Configuration parameters = new Configuration();
AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
format.configure(parameters);
FileInputSplit[] splits = format.createInputSplits(1);
assertEquals(splits.length, 1);
format.open(splits[0]);
User u = format.nextRecord(null);
assertNotNull(u);
String name = u.getName().toString();
assertNotNull("empty record", name);
assertEquals("name not equal", TEST_NAME, name);
// check arrays
List<CharSequence> sl = u.getTypeArrayString();
assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
List<Boolean> bl = u.getTypeArrayBoolean();
assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
// check enums
Colors enumValue = u.getTypeEnum();
assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
// check maps
Map<CharSequence, Long> lm = u.getTypeMap();
assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
assertFalse("expecting second element", format.reachedEnd());
assertNotNull("expecting second element", format.nextRecord(u));
assertNull(format.nextRecord(u));
assertTrue(format.reachedEnd());
format.close();
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class AvroSplittableInputFormatTest method testAvroRecoveryWithFailureAtStart.
@Test
public void testAvroRecoveryWithFailureAtStart() throws Exception {
final int recordsUntilCheckpoint = 132;
Configuration parameters = new Configuration();
AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
format.configure(parameters);
FileInputSplit[] splits = format.createInputSplits(4);
assertEquals(splits.length, 4);
int elements = 0;
int[] elementsPerSplit = new int[4];
for (int i = 0; i < splits.length; i++) {
format.reopen(splits[i], format.getCurrentState());
while (!format.reachedEnd()) {
User u = format.nextRecord(null);
Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
elements++;
if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
// do the whole checkpoint-restore procedure and see if we pick up from where we left off.
Tuple2<Long, Long> state = format.getCurrentState();
// this is to make sure that nothing stays from the previous format
// (as it is going to be in the normal case)
format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
format.reopen(splits[i], state);
assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
}
elementsPerSplit[i]++;
}
format.close();
}
Assert.assertEquals(1539, elementsPerSplit[0]);
Assert.assertEquals(1026, elementsPerSplit[1]);
Assert.assertEquals(1539, elementsPerSplit[2]);
Assert.assertEquals(896, elementsPerSplit[3]);
Assert.assertEquals(NUM_RECORDS, elements);
format.close();
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class AvroSplittableInputFormatTest method testSplittedIF.
@Test
public void testSplittedIF() throws IOException {
Configuration parameters = new Configuration();
AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
format.configure(parameters);
FileInputSplit[] splits = format.createInputSplits(4);
assertEquals(splits.length, 4);
int elements = 0;
int[] elementsPerSplit = new int[4];
for (int i = 0; i < splits.length; i++) {
format.open(splits[i]);
while (!format.reachedEnd()) {
User u = format.nextRecord(null);
Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
elements++;
elementsPerSplit[i]++;
}
format.close();
}
Assert.assertEquals(1539, elementsPerSplit[0]);
Assert.assertEquals(1026, elementsPerSplit[1]);
Assert.assertEquals(1539, elementsPerSplit[2]);
Assert.assertEquals(896, elementsPerSplit[3]);
Assert.assertEquals(NUM_RECORDS, elements);
format.close();
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class AvroSplittableInputFormatTest method testAvroRecovery.
@Test
public void testAvroRecovery() throws Exception {
final int recordsUntilCheckpoint = 132;
Configuration parameters = new Configuration();
AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
format.configure(parameters);
FileInputSplit[] splits = format.createInputSplits(4);
assertEquals(splits.length, 4);
int elements = 0;
int[] elementsPerSplit = new int[4];
for (int i = 0; i < splits.length; i++) {
format.open(splits[i]);
while (!format.reachedEnd()) {
User u = format.nextRecord(null);
Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
elements++;
if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
// do the whole checkpoint-restore procedure and see if we pick up from where we left off.
Tuple2<Long, Long> state = format.getCurrentState();
// this is to make sure that nothing stays from the previous format
// (as it is going to be in the normal case)
format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
format.reopen(splits[i], state);
assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
}
elementsPerSplit[i]++;
}
format.close();
}
Assert.assertEquals(1539, elementsPerSplit[0]);
Assert.assertEquals(1026, elementsPerSplit[1]);
Assert.assertEquals(1539, elementsPerSplit[2]);
Assert.assertEquals(896, elementsPerSplit[3]);
Assert.assertEquals(NUM_RECORDS, elements);
format.close();
}
Aggregations