Search in sources :

Example 1 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class ContinuousFileProcessingMigrationTest method testReaderSnapshotRestore.

//						END OF PREPARATIONS
//						TESTS
@Test
public void testReaderSnapshotRestore() throws Exception {
    /*

		FileInputSplit split1 =
			new FileInputSplit(3, new Path("test/test1"), 0, 100, null);
		FileInputSplit split2 =
			new FileInputSplit(2, new Path("test/test2"), 101, 200, null);
		FileInputSplit split3 =
			new FileInputSplit(1, new Path("test/test2"), 0, 100, null);
		FileInputSplit split4 =
			new FileInputSplit(0, new Path("test/test3"), 0, 100, null);

		final OneShotLatch latch = new OneShotLatch();
		BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
		TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
		ContinuousFileReaderOperator<FileInputSplit, ?> initReader = new ContinuousFileReaderOperator<>(format);
		initReader.setOutputType(typeInfo, new ExecutionConfig());
		OneInputStreamOperatorTestHarness<FileInputSplit, FileInputSplit> initTestInstance =
			new OneInputStreamOperatorTestHarness<>(initReader);
		initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
		initTestInstance.open();
		// create some state in the reader
		initTestInstance.processElement(new StreamRecord<>(split1));
		initTestInstance.processElement(new StreamRecord<>(split2));
		initTestInstance.processElement(new StreamRecord<>(split3));
		initTestInstance.processElement(new StreamRecord<>(split4));
		// take a snapshot of the operator's state. This will be used
		// to initialize another reader and compare the results of the
		// two operators.
		final StreamTaskState snapshot;
		synchronized (initTestInstance.getCheckpointLock()) {
			snapshot = initTestInstance.snapshot(0L, 0L);
		}

		initTestInstance.snaphotToFile(snapshot, "src/test/resources/reader-migration-test-flink1.1-snapshot");

		*/
    TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
    TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
    TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
    TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
    final OneShotLatch latch = new OneShotLatch();
    BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
    TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
    ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
    initReader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
    initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
    initTestInstance.setup();
    initTestInstance.initializeStateFromLegacyCheckpoint(getResourceFilename("reader-migration-test-flink1.1-snapshot"));
    initTestInstance.open();
    latch.trigger();
    synchronized (initTestInstance.getCheckpointLock()) {
        initTestInstance.close();
    }
    FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
    FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
    FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
    FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
    // compare if the results contain what they should contain and also if
    // they are the same, as they should.
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) Test(org.junit.Test)

Example 2 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class AvroRecordInputFormatTest method testDeserialisation.

/**
	 * Test if the AvroInputFormat is able to properly read data from an avro file.
	 * @throws IOException
	 */
@Test
public void testDeserialisation() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(1);
    assertEquals(splits.length, 1);
    format.open(splits[0]);
    User u = format.nextRecord(null);
    assertNotNull(u);
    String name = u.getName().toString();
    assertNotNull("empty record", name);
    assertEquals("name not equal", TEST_NAME, name);
    // check arrays
    List<CharSequence> sl = u.getTypeArrayString();
    assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
    assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
    List<Boolean> bl = u.getTypeArrayBoolean();
    assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
    assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
    // check enums
    Colors enumValue = u.getTypeEnum();
    assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
    // check maps
    Map<CharSequence, Long> lm = u.getTypeMap();
    assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
    assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
    assertFalse("expecting second element", format.reachedEnd());
    assertNotNull("expecting second element", format.nextRecord(u));
    assertNull(format.nextRecord(u));
    assertTrue(format.reachedEnd());
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Colors(org.apache.flink.api.io.avro.generated.Colors) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Example 3 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class AvroSplittableInputFormatTest method testAvroRecoveryWithFailureAtStart.

@Test
public void testAvroRecoveryWithFailureAtStart() throws Exception {
    final int recordsUntilCheckpoint = 132;
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.reopen(splits[i], format.getCurrentState());
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
                // do the whole checkpoint-restore procedure and see if we pick up from where we left off.
                Tuple2<Long, Long> state = format.getCurrentState();
                // this is to make sure that nothing stays from the previous format
                // (as it is going to be in the normal case)
                format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
                format.reopen(splits[i], state);
                assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
            }
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 4 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class AvroSplittableInputFormatTest method testSplittedIF.

@Test
public void testSplittedIF() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.open(splits[i]);
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 5 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class AvroSplittableInputFormatTest method testAvroRecovery.

@Test
public void testAvroRecovery() throws Exception {
    final int recordsUntilCheckpoint = 132;
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.open(splits[i]);
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
                // do the whole checkpoint-restore procedure and see if we pick up from where we left off.
                Tuple2<Long, Long> state = format.getCurrentState();
                // this is to make sure that nothing stays from the previous format
                // (as it is going to be in the normal case)
                format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
                format.reopen(splits[i], state);
                assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
            }
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Aggregations

FileInputSplit (org.apache.flink.core.fs.FileInputSplit)178 Test (org.junit.Test)150 Configuration (org.apache.flink.configuration.Configuration)119 Path (org.apache.flink.core.fs.Path)72 IOException (java.io.IOException)48 File (java.io.File)47 Row (org.apache.flink.types.Row)36 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)34 FileOutputStream (java.io.FileOutputStream)28 OutputStreamWriter (java.io.OutputStreamWriter)22 DoubleValue (org.apache.flink.types.DoubleValue)18 IntValue (org.apache.flink.types.IntValue)18 LongValue (org.apache.flink.types.LongValue)18 StringValue (org.apache.flink.types.StringValue)18 Value (org.apache.flink.types.Value)18 ParseException (org.apache.flink.api.common.io.ParseException)17 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)17 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)15 RowTypeInfo (org.apache.flink.api.java.typeutils.RowTypeInfo)15 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)13