Search in sources :

Example 21 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class ContinuousFileProcessingTest method testProcessOnce.

@Test
public void testProcessOnce() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    final OneShotLatch latch = new OneShotLatch();
    // create a single file in the directory
    Tuple2<org.apache.hadoop.fs.Path, String> bootstrap = createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
    Assert.assertTrue(hdfs.exists(bootstrap.f0));
    // the source is supposed to read only this file.
    final Set<String> filesToBeRead = new TreeSet<>();
    filesToBeRead.add(bootstrap.f0.getName());
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    format.setFilesFilter(FilePathFilter.createDefaultFilter());
    final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
    final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction);
    final Thread t = new Thread() {

        @Override
        public void run() {
            try {
                monitoringFunction.open(new Configuration());
                monitoringFunction.run(context);
                // we would never arrive here if we were in
                // PROCESS_CONTINUOUSLY mode.
                // this will trigger the latch
                context.close();
            } catch (Exception e) {
                Assert.fail(e.getMessage());
            }
        }
    };
    t.start();
    if (!latch.isTriggered()) {
        latch.await();
    }
    // create some additional files that should be processed in the case of PROCESS_CONTINUOUSLY
    final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> ignoredFile = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        filesCreated[i] = ignoredFile.f0;
    }
    // wait until the monitoring thread exits
    t.join();
    Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());
    // finally delete the files created for the test.
    hdfs.delete(bootstrap.f0, false);
    for (org.apache.hadoop.fs.Path path : filesCreated) {
        hdfs.delete(path, false);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) TreeSet(java.util.TreeSet) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 22 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class ContinuousFileProcessingTest method testFunctionRestore.

@Test
public void testFunctionRestore() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    org.apache.hadoop.fs.Path path = null;
    long fileModTime = Long.MIN_VALUE;
    for (int i = 0; i < 1; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        path = file.f0;
        fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
    final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    final OneShotLatch latch = new OneShotLatch();
    final DummySourceContext sourceContext = new DummySourceContext() {

        @Override
        public void collect(TimestampedFileInputSplit element) {
            latch.trigger();
        }
    };
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                monitoringFunction.run(sourceContext);
            } catch (Throwable t) {
                t.printStackTrace();
                error[0] = t;
            }
        }
    };
    runner.start();
    // first condition for the source to have updated its state: emit at least one element
    if (!latch.isTriggered()) {
        latch.await();
    }
    // this means it has processed all the splits and updated its state.
    synchronized (sourceContext.getCheckpointLock()) {
    }
    OperatorStateHandles snapshot = testHarness.snapshot(0, 0);
    monitoringFunction.cancel();
    runner.join();
    testHarness.close();
    final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy = new StreamSource<>(monitoringFunctionCopy);
    AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
    testHarnessCopy.initializeState(snapshot);
    testHarnessCopy.open();
    Assert.assertNull(error[0]);
    Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime());
    hdfs.delete(path, false);
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 23 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class ContinuousFileProcessingTest method testNestedFilesProcessing.

@Test
public void testNestedFilesProcessing() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    final Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
    final Set<String> filesToBeRead = new TreeSet<>();
    // create two nested directories
    org.apache.hadoop.fs.Path firstLevelDir = new org.apache.hadoop.fs.Path(testBasePath + "/" + "firstLevelDir");
    org.apache.hadoop.fs.Path secondLevelDir = new org.apache.hadoop.fs.Path(testBasePath + "/" + "firstLevelDir" + "/" + "secondLevelDir");
    Assert.assertFalse(hdfs.exists(firstLevelDir));
    hdfs.mkdirs(firstLevelDir);
    hdfs.mkdirs(secondLevelDir);
    // create files in the base dir, the first level dir and the second level dir
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "firstLevelFile", i, "This is test line.");
        filesCreated.add(file.f0);
        filesToBeRead.add(file.f0.getName());
    }
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(firstLevelDir.toString(), "secondLevelFile", i, "This is test line.");
        filesCreated.add(file.f0);
        filesToBeRead.add(file.f0.getName());
    }
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(secondLevelDir.toString(), "thirdLevelFile", i, "This is test line.");
        filesCreated.add(file.f0);
        filesToBeRead.add(file.f0.getName());
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    format.setFilesFilter(FilePathFilter.createDefaultFilter());
    format.setNestedFileEnumeration(true);
    ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
    final FileVerifyingSourceContext context = new FileVerifyingSourceContext(new OneShotLatch(), monitoringFunction);
    monitoringFunction.open(new Configuration());
    monitoringFunction.run(context);
    Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());
    // finally delete the dirs and the files created for the test.
    for (org.apache.hadoop.fs.Path file : filesCreated) {
        hdfs.delete(file, false);
    }
    hdfs.delete(secondLevelDir, false);
    hdfs.delete(firstLevelDir, false);
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) TreeSet(java.util.TreeSet) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 24 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class ContinuousFileProcessingTest method testFilePathFiltering.

////				Monitoring Function Tests				//////
@Test
public void testFilePathFiltering() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
    Set<String> filesKept = new TreeSet<>();
    // create the files to be discarded
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "**file", i, "This is test line.");
        filesCreated.add(file.f0);
    }
    // create the files to be kept
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        filesCreated.add(file.f0);
        filesKept.add(file.f0.getName());
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    format.setFilesFilter(new FilePathFilter() {

        private static final long serialVersionUID = 2611449927338589804L;

        @Override
        public boolean filterPath(Path filePath) {
            return filePath.getName().startsWith("**");
        }
    });
    ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
    final FileVerifyingSourceContext context = new FileVerifyingSourceContext(new OneShotLatch(), monitoringFunction);
    monitoringFunction.open(new Configuration());
    monitoringFunction.run(context);
    Assert.assertArrayEquals(filesKept.toArray(), context.getSeenFiles().toArray());
    // finally delete the files created for the test.
    for (org.apache.hadoop.fs.Path file : filesCreated) {
        hdfs.delete(file, false);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) TreeSet(java.util.TreeSet) FilePathFilter(org.apache.flink.api.common.io.FilePathFilter) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 25 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class RocksDBAsyncSnapshotTest method testCancelFullyAsyncCheckpoints.

/**
	 * This tests ensures that canceling of asynchronous snapshots works as expected and does not block.
	 * @throws Exception
	 */
@Test
@Ignore
public void testCancelFullyAsyncCheckpoints() throws Exception {
    LocalFileSystem localFS = new LocalFileSystem();
    localFS.initialize(new URI("file:///"), new Configuration());
    PowerMockito.stub(PowerMockito.method(FileSystem.class, "get", URI.class, Configuration.class)).toReturn(localFS);
    final OneInputStreamTask<String, String> task = new OneInputStreamTask<>();
    final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<>(task, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.configureForKeyedStream(new KeySelector<String, String>() {

        @Override
        public String getKey(String value) throws Exception {
            return value;
        }
    }, BasicTypeInfo.STRING_TYPE_INFO);
    StreamConfig streamConfig = testHarness.getStreamConfig();
    File dbDir = new File(new File(ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH, UUID.randomUUID().toString()), "state");
    BlockingStreamMemoryStateBackend memoryStateBackend = new BlockingStreamMemoryStateBackend();
    RocksDBStateBackend backend = new RocksDBStateBackend(memoryStateBackend);
    backend.setDbStoragePath(dbDir.getAbsolutePath());
    streamConfig.setStateBackend(backend);
    streamConfig.setStreamOperator(new AsyncCheckpointOperator());
    StreamMockEnvironment mockEnv = new StreamMockEnvironment(testHarness.jobConfig, testHarness.taskConfig, testHarness.memorySize, new MockInputSplitProvider(), testHarness.bufferSize);
    BlockingStreamMemoryStateBackend.waitFirstWriteLatch = new OneShotLatch();
    BlockingStreamMemoryStateBackend.unblockCancelLatch = new OneShotLatch();
    testHarness.invoke(mockEnv);
    // wait for the task to be running
    for (Field field : StreamTask.class.getDeclaredFields()) {
        if (field.getName().equals("isRunning")) {
            field.setAccessible(true);
            while (!field.getBoolean(task)) {
                Thread.sleep(10);
            }
        }
    }
    task.triggerCheckpoint(new CheckpointMetaData(42, 17), CheckpointOptions.forFullCheckpoint());
    testHarness.processElement(new StreamRecord<>("Wohoo", 0));
    BlockingStreamMemoryStateBackend.waitFirstWriteLatch.await();
    task.cancel();
    BlockingStreamMemoryStateBackend.unblockCancelLatch.trigger();
    testHarness.endInput();
    try {
        ExecutorService threadPool = task.getAsyncOperationsThreadPool();
        threadPool.shutdown();
        Assert.assertTrue(threadPool.awaitTermination(60_000, TimeUnit.MILLISECONDS));
        testHarness.waitForTaskCompletion();
        if (mockEnv.wasFailedExternally()) {
            throw new AsynchronousException(new InterruptedException("Exception was thrown as expected."));
        }
        fail("Operation completed. Cancel failed.");
    } catch (Exception expected) {
        AsynchronousException asynchronousException = null;
        if (expected instanceof AsynchronousException) {
            asynchronousException = (AsynchronousException) expected;
        } else if (expected.getCause() instanceof AsynchronousException) {
            asynchronousException = (AsynchronousException) expected.getCause();
        } else {
            fail("Unexpected exception: " + expected);
        }
        // we expect the exception from canceling snapshots
        Throwable innerCause = asynchronousException.getCause();
        Assert.assertTrue("Unexpected inner cause: " + innerCause, //future canceled
        innerCause instanceof CancellationException || //thread interrupted
        innerCause instanceof InterruptedException);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) OneInputStreamTask(org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) Matchers.anyString(org.mockito.Matchers.anyString) URI(java.net.URI) Field(java.lang.reflect.Field) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) StreamMockEnvironment(org.apache.flink.streaming.runtime.tasks.StreamMockEnvironment) MockInputSplitProvider(org.apache.flink.runtime.operators.testutils.MockInputSplitProvider) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) AsynchronousException(org.apache.flink.streaming.runtime.tasks.AsynchronousException) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) AsynchronousException(org.apache.flink.streaming.runtime.tasks.AsynchronousException) CancellationException(java.util.concurrent.CancellationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CancellationException(java.util.concurrent.CancellationException) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) OneInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.OneInputStreamTaskTestHarness) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) PowerMockIgnore(org.powermock.core.classloader.annotations.PowerMockIgnore) Ignore(org.junit.Ignore) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)38 Test (org.junit.Test)34 Configuration (org.apache.flink.configuration.Configuration)10 Path (org.apache.flink.core.fs.Path)9 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)9 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)8 InvocationOnMock (org.mockito.invocation.InvocationOnMock)8 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)5 Environment (org.apache.flink.runtime.execution.Environment)5 TimestampedFileInputSplit (org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit)5 OperatorStateHandles (org.apache.flink.streaming.runtime.tasks.OperatorStateHandles)5 IOException (java.io.IOException)4 TreeSet (java.util.TreeSet)4 AtomicReference (java.util.concurrent.atomic.AtomicReference)4 TextInputFormat (org.apache.flink.api.java.io.TextInputFormat)4 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)4 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)4 SubtaskState (org.apache.flink.runtime.checkpoint.SubtaskState)4 ContinuousFileMonitoringFunction (org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction)4 File (java.io.File)3