Search in sources :

Example 6 with OutputFileConfig

use of org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig in project flink by apache.

the class FileWriterTest method testNumberRecordsOutCounter.

@Test
public void testNumberRecordsOutCounter() throws IOException, InterruptedException {
    final OperatorIOMetricGroup operatorIOMetricGroup = UnregisteredMetricGroups.createUnregisteredOperatorMetricGroup().getIOMetricGroup();
    File outDir = TEMP_FOLDER.newFolder();
    Path path = new Path(outDir.toURI());
    Counter recordsCounter = operatorIOMetricGroup.getNumRecordsOutCounter();
    SinkWriter.Context context = new ContextImpl();
    FileWriter<String> fileWriter = createWriter(path, DefaultRollingPolicy.builder().build(), new OutputFileConfig("part-", ""), operatorIOMetricGroup);
    assertEquals(0, recordsCounter.getCount());
    fileWriter.write("1", context);
    assertEquals(1, recordsCounter.getCount());
    fileWriter.write("2", context);
    fileWriter.write("3", context);
    assertEquals(3, recordsCounter.getCount());
}
Also used : Path(org.apache.flink.core.fs.Path) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) Counter(org.apache.flink.metrics.Counter) OperatorIOMetricGroup(org.apache.flink.metrics.groups.OperatorIOMetricGroup) SinkWriter(org.apache.flink.api.connector.sink2.SinkWriter) File(java.io.File) Test(org.junit.Test)

Example 7 with OutputFileConfig

use of org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig in project flink by apache.

the class FileWriterTest method testSnapshotAndRestore.

@Test
public void testSnapshotAndRestore() throws Exception {
    File outDir = TEMP_FOLDER.newFolder();
    Path path = new Path(outDir.toURI());
    FileWriter<String> fileWriter = createWriter(path, DefaultRollingPolicy.builder().build(), new OutputFileConfig("part-", ""));
    fileWriter.write("test1", new ContextImpl());
    fileWriter.write("test2", new ContextImpl());
    fileWriter.write("test3", new ContextImpl());
    assertEquals(3, fileWriter.getActiveBuckets().size());
    fileWriter.prepareCommit();
    List<FileWriterBucketState> states = fileWriter.snapshotState(1L);
    assertEquals(3, states.size());
    fileWriter = restoreWriter(states, path, OnCheckpointRollingPolicy.build(), new OutputFileConfig("part-", ""));
    assertEquals(fileWriter.getActiveBuckets().keySet(), new HashSet<>(Arrays.asList("test1", "test2", "test3")));
    for (FileWriterBucket<String> bucket : fileWriter.getActiveBuckets().values()) {
        assertNotNull("The in-progress file should be recovered", bucket.getInProgressPart());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) File(java.io.File) Test(org.junit.Test)

Example 8 with OutputFileConfig

use of org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig in project flink by apache.

the class FileWriterTest method testBucketIsRemovedWhenNotActive.

@Test
public void testBucketIsRemovedWhenNotActive() throws Exception {
    File outDir = TEMP_FOLDER.newFolder();
    Path path = new Path(outDir.toURI());
    FileWriter<String> fileWriter = createWriter(path, OnCheckpointRollingPolicy.build(), new OutputFileConfig("part-", ""));
    fileWriter.write("test", new ContextImpl());
    fileWriter.prepareCommit();
    fileWriter.snapshotState(1L);
    // No more records and another call to prepareCommit will makes it inactive
    fileWriter.prepareCommit();
    assertTrue(fileWriter.getActiveBuckets().isEmpty());
}
Also used : Path(org.apache.flink.core.fs.Path) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) File(java.io.File) Test(org.junit.Test)

Example 9 with OutputFileConfig

use of org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig in project flink by apache.

the class FileWriterTest method testOnProcessingTime.

@Test
public void testOnProcessingTime() throws Exception {
    File outDir = TEMP_FOLDER.newFolder();
    Path path = new Path(outDir.toURI());
    // Create the processing timer service starts from 10.
    ManuallyTriggeredProcessingTimeService processingTimeService = new ManuallyTriggeredProcessingTimeService();
    processingTimeService.advanceTo(10);
    FileWriter<String> fileWriter = createWriter(path, new FileSinkTestUtils.StringIdentityBucketAssigner(), DefaultRollingPolicy.builder().withRolloverInterval(Duration.ofMillis(10)).build(), new OutputFileConfig("part-", ""), processingTimeService, 5);
    fileWriter.initializeState(Collections.emptyList());
    // Test timer registered timer@15 on startup
    fileWriter.write("test1", new ContextImpl());
    processingTimeService.advanceTo(15);
    fileWriter.write("test2", new ContextImpl());
    processingTimeService.advanceTo(20);
    FileWriterBucket<String> test1Bucket = fileWriter.getActiveBuckets().get("test1");
    assertNull("The in-progress part of test1 should be rolled", test1Bucket.getInProgressPart());
    assertEquals(1, test1Bucket.getPendingFiles().size());
    FileWriterBucket<String> test2Bucket = fileWriter.getActiveBuckets().get("test2");
    assertNotNull("The in-progress part of test2 should not be rolled", test2Bucket.getInProgressPart());
    assertEquals(0, test2Bucket.getPendingFiles().size());
    // Close, pre-commit & clear all the pending records.
    processingTimeService.advanceTo(30);
    fileWriter.prepareCommit();
    // Test timer re-registration.
    fileWriter.write("test1", new ContextImpl());
    processingTimeService.advanceTo(35);
    fileWriter.write("test2", new ContextImpl());
    processingTimeService.advanceTo(40);
    test1Bucket = fileWriter.getActiveBuckets().get("test1");
    assertNull("The in-progress part of test1 should be rolled", test1Bucket.getInProgressPart());
    assertEquals(1, test1Bucket.getPendingFiles().size());
    test2Bucket = fileWriter.getActiveBuckets().get("test2");
    assertNotNull("The in-progress part of test2 should not be rolled", test2Bucket.getInProgressPart());
    assertEquals(0, test2Bucket.getPendingFiles().size());
}
Also used : Path(org.apache.flink.core.fs.Path) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) FileSinkTestUtils(org.apache.flink.connector.file.sink.utils.FileSinkTestUtils) File(java.io.File) Test(org.junit.Test)

Example 10 with OutputFileConfig

use of org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig in project flink by apache.

the class FileWriterTest method testMergingForRescaling.

@Test
public void testMergingForRescaling() throws Exception {
    File outDir = TEMP_FOLDER.newFolder();
    Path path = new Path(outDir.toURI());
    FileWriter<String> firstFileWriter = createWriter(path, DefaultRollingPolicy.builder().build(), new OutputFileConfig("part-", ""));
    firstFileWriter.write("test1", new ContextImpl());
    firstFileWriter.write("test2", new ContextImpl());
    firstFileWriter.write("test3", new ContextImpl());
    firstFileWriter.prepareCommit();
    List<FileWriterBucketState> firstState = firstFileWriter.snapshotState(1L);
    FileWriter<String> secondFileWriter = createWriter(path, DefaultRollingPolicy.builder().build(), new OutputFileConfig("part-", ""));
    secondFileWriter.write("test1", new ContextImpl());
    secondFileWriter.write("test2", new ContextImpl());
    secondFileWriter.prepareCommit();
    List<FileWriterBucketState> secondState = secondFileWriter.snapshotState(1L);
    List<FileWriterBucketState> mergedState = new ArrayList<>();
    mergedState.addAll(firstState);
    mergedState.addAll(secondState);
    FileWriter<String> restoredWriter = restoreWriter(mergedState, path, DefaultRollingPolicy.builder().build(), new OutputFileConfig("part-", ""));
    assertEquals(3, restoredWriter.getActiveBuckets().size());
    // Merged buckets
    for (String bucketId : Arrays.asList("test1", "test2")) {
        FileWriterBucket<String> bucket = restoredWriter.getActiveBuckets().get(bucketId);
        assertNotNull("The in-progress file should be recovered", bucket.getInProgressPart());
        assertEquals(1, bucket.getPendingFiles().size());
    }
    // Not merged buckets
    for (String bucketId : Collections.singletonList("test3")) {
        FileWriterBucket<String> bucket = restoredWriter.getActiveBuckets().get(bucketId);
        assertNotNull("The in-progress file should be recovered", bucket.getInProgressPart());
        assertEquals(0, bucket.getPendingFiles().size());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) ArrayList(java.util.ArrayList) File(java.io.File) Test(org.junit.Test)

Aggregations

OutputFileConfig (org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig)11 File (java.io.File)7 Path (org.apache.flink.core.fs.Path)7 Test (org.junit.Test)6 PartitionCommitInfo (org.apache.flink.connector.file.table.stream.PartitionCommitInfo)3 HiveWriterFactory (org.apache.flink.connectors.hive.write.HiveWriterFactory)3 RowData (org.apache.flink.table.data.RowData)3 IOException (java.io.IOException)2 UncheckedIOException (java.io.UncheckedIOException)2 Properties (java.util.Properties)2 FileSystemTableSink (org.apache.flink.connector.file.table.FileSystemTableSink)2 TableBucketAssigner (org.apache.flink.connector.file.table.FileSystemTableSink.TableBucketAssigner)2 CompactReader (org.apache.flink.connector.file.table.stream.compact.CompactReader)2 HiveCompactReaderFactory (org.apache.flink.connectors.hive.read.HiveCompactReaderFactory)2 HiveBulkWriterFactory (org.apache.flink.connectors.hive.write.HiveBulkWriterFactory)2 HiveOutputFormatFactory (org.apache.flink.connectors.hive.write.HiveOutputFormatFactory)2 ThreadLocalClassLoaderConfiguration (org.apache.flink.orc.writer.ThreadLocalClassLoaderConfiguration)2 CatalogTable (org.apache.flink.table.catalog.CatalogTable)2 CatalogException (org.apache.flink.table.catalog.exceptions.CatalogException)2 HiveMetastoreClientFactory (org.apache.flink.table.catalog.hive.client.HiveMetastoreClientFactory)2