Search in sources :

Example 21 with FSWindowDataManager

use of org.apache.apex.malhar.lib.wal.FSWindowDataManager in project apex-malhar by apache.

the class FileSplitterInputTest method testFirstWindowAfterRecovery.

@Test
public void testFirstWindowAfterRecovery() throws IOException, InterruptedException {
    FSWindowDataManager fsWindowDataManager = new FSWindowDataManager();
    testMeta.updateConfig(fsWindowDataManager, 500, 2L, 10);
    testMeta.fileSplitterInput.setup(testMeta.context);
    validateRecovery(8, 2);
    Thread.sleep(1000);
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 2; line < 4; line++) {
        lines.add("f13" + "l" + line);
    }
    File f13 = new File(testMeta.dataDirectory, "file13" + ".txt");
    FileUtils.writeLines(f13, lines, true);
    testMeta.fileMetadataSink.clear();
    testMeta.blockMetadataSink.clear();
    testMeta.fileSplitterInput.beginWindow(9);
    ((MockScanner) testMeta.fileSplitterInput.getScanner()).semaphore.acquire();
    testMeta.fileSplitterInput.emitTuples();
    testMeta.fileSplitterInput.endWindow();
    Assert.assertEquals("Files " + testMeta.fileMetadataSink.collectedTuples, 1, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Blocks", 6, testMeta.blockMetadataSink.collectedTuples.size());
    testMeta.fileSplitterInput.teardown();
}
Also used : File(java.io.File) FSWindowDataManager(org.apache.apex.malhar.lib.wal.FSWindowDataManager) Test(org.junit.Test)

Example 22 with FSWindowDataManager

use of org.apache.apex.malhar.lib.wal.FSWindowDataManager in project apex-malhar by apache.

the class FileSplitterInputTest method testRecoveryOfPartialFile.

@Test
public void testRecoveryOfPartialFile() throws InterruptedException {
    FSWindowDataManager fsIdempotentStorageManager = new FSWindowDataManager();
    testMeta.updateConfig(fsIdempotentStorageManager, 500L, 2L, 2);
    FileSplitterInput checkpointedInput = KryoCloneUtils.cloneObject(testMeta.fileSplitterInput);
    testMeta.fileSplitterInput.setup(testMeta.context);
    testMeta.fileSplitterInput.beginWindow(1);
    testMeta.scanner.semaphore.acquire(12);
    testMeta.fileSplitterInput.emitTuples();
    testMeta.fileSplitterInput.endWindow();
    // fileX.txt has just 6 blocks. Since blocks threshold is 2, only 2 are emitted.
    Assert.assertEquals("Files", 1, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Blocks", 2, testMeta.blockMetadataSink.collectedTuples.size());
    AbstractFileSplitter.FileMetadata fileX = testMeta.fileMetadataSink.collectedTuples.get(0);
    testMeta.fileMetadataSink.clear();
    testMeta.blockMetadataSink.clear();
    testMeta.fileSplitterInput.teardown();
    // there was a failure and the operator was re-deployed
    testMeta.fileSplitterInput = checkpointedInput;
    testMeta.resetSinks();
    testMeta.fileSplitterInput.setup(testMeta.context);
    testMeta.fileSplitterInput.beginWindow(1);
    // fileX is recovered and first two blocks are repeated.
    Assert.assertEquals("Recovered Files", 1, testMeta.fileMetadataSink.collectedTuples.size());
    AbstractFileSplitter.FileMetadata fileXRecovered = testMeta.fileMetadataSink.collectedTuples.get(0);
    Assert.assertEquals("recovered file-metadata", fileX.getFileName(), fileXRecovered.getFileName());
    Assert.assertEquals("Recovered Blocks", 2, testMeta.blockMetadataSink.collectedTuples.size());
    testMeta.fileSplitterInput.endWindow();
    testMeta.fileMetadataSink.clear();
    testMeta.blockMetadataSink.clear();
    testMeta.fileSplitterInput.beginWindow(2);
    // next 2 blocks of fileX
    testMeta.fileSplitterInput.emitTuples();
    testMeta.fileSplitterInput.endWindow();
    testMeta.fileSplitterInput.beginWindow(3);
    // next 2 blocks of fileX
    testMeta.fileSplitterInput.emitTuples();
    testMeta.fileSplitterInput.endWindow();
    // Next 2 blocks of fileX
    Assert.assertEquals("File", 0, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Blocks", 4, testMeta.blockMetadataSink.collectedTuples.size());
    testMeta.fileMetadataSink.clear();
    testMeta.blockMetadataSink.clear();
    testMeta.fileSplitterInput.beginWindow(4);
    ((MockScanner) testMeta.fileSplitterInput.getScanner()).semaphore.acquire(11);
    testMeta.fileSplitterInput.emitTuples();
    testMeta.fileSplitterInput.endWindow();
    // 2 blocks of a different file
    Assert.assertEquals("New file", 1, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Blocks", 2, testMeta.blockMetadataSink.collectedTuples.size());
    AbstractFileSplitter.FileMetadata fileY = testMeta.fileMetadataSink.collectedTuples.get(0);
    for (BlockMetadata.FileBlockMetadata blockMetadata : testMeta.blockMetadataSink.collectedTuples) {
        Assert.assertTrue("Block file name", blockMetadata.getFilePath().endsWith(fileY.getFileName()));
        testMeta.fileSplitterInput.teardown();
    }
}
Also used : BlockMetadata(org.apache.apex.malhar.lib.io.block.BlockMetadata) FSWindowDataManager(org.apache.apex.malhar.lib.wal.FSWindowDataManager) Test(org.junit.Test)

Example 23 with FSWindowDataManager

use of org.apache.apex.malhar.lib.wal.FSWindowDataManager in project apex-malhar by apache.

the class FileSplitterInputTest method testRecoveryOfBlockMetadataIterator.

@Test
public void testRecoveryOfBlockMetadataIterator() throws InterruptedException {
    FSWindowDataManager fsWindowDataManager = new FSWindowDataManager();
    testMeta.updateConfig(fsWindowDataManager, 500L, 2L, 2);
    testMeta.fileSplitterInput.setup(testMeta.context);
    testMeta.fileSplitterInput.beginWindow(1);
    testMeta.scanner.semaphore.acquire(12);
    testMeta.fileSplitterInput.emitTuples();
    testMeta.fileSplitterInput.endWindow();
    // file0.txt has just 5 blocks. Since blocks threshold is 2, only 2 are emitted.
    Assert.assertEquals("Files", 1, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Blocks", 2, testMeta.blockMetadataSink.collectedTuples.size());
    testMeta.fileMetadataSink.clear();
    testMeta.blockMetadataSink.clear();
    // At this point the operator was check-pointed and then there was a failure.
    testMeta.fileSplitterInput.teardown();
    // The operator was restored from persisted state and re-deployed.
    testMeta.fileSplitterInput = KryoCloneUtils.cloneObject(testMeta.fileSplitterInput);
    TestUtils.setSink(testMeta.fileSplitterInput.blocksMetadataOutput, testMeta.blockMetadataSink);
    TestUtils.setSink(testMeta.fileSplitterInput.filesMetadataOutput, testMeta.fileMetadataSink);
    testMeta.fileSplitterInput.setup(testMeta.context);
    testMeta.fileSplitterInput.beginWindow(1);
    Assert.assertEquals("Recovered Files", 1, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Recovered Blocks", 2, testMeta.blockMetadataSink.collectedTuples.size());
    testMeta.fileSplitterInput.teardown();
}
Also used : FSWindowDataManager(org.apache.apex.malhar.lib.wal.FSWindowDataManager) Test(org.junit.Test)

Example 24 with FSWindowDataManager

use of org.apache.apex.malhar.lib.wal.FSWindowDataManager in project apex-malhar by apache.

the class FileSplitterTest method testIdempotency.

@Test
public void testIdempotency() throws InterruptedException {
    FSWindowDataManager fsWindowDataManager = new FSWindowDataManager();
    testMeta.fileSplitter.setWindowDataManager(fsWindowDataManager);
    testMeta.fileSplitter.setup(testMeta.context);
    // will emit window 1 from data directory
    testFileMetadata();
    testMeta.fileMetadataSink.clear();
    testMeta.blockMetadataSink.clear();
    testMeta.fileSplitter.setup(testMeta.context);
    testMeta.fileSplitter.beginWindow(1);
    Assert.assertEquals("Blocks", 12, testMeta.blockMetadataSink.collectedTuples.size());
    for (Object blockMetadata : testMeta.blockMetadataSink.collectedTuples) {
        BlockMetadata.FileBlockMetadata metadata = (BlockMetadata.FileBlockMetadata) blockMetadata;
        Assert.assertTrue("path: " + metadata.getFilePath(), testMeta.filePaths.contains(metadata.getFilePath()));
    }
}
Also used : BlockMetadata(org.apache.apex.malhar.lib.io.block.BlockMetadata) FSWindowDataManager(org.apache.apex.malhar.lib.wal.FSWindowDataManager) Test(org.junit.Test)

Example 25 with FSWindowDataManager

use of org.apache.apex.malhar.lib.wal.FSWindowDataManager in project apex-malhar by apache.

the class FileSplitterTest method testRecoveryOfPartialFile.

@Ignore
public void testRecoveryOfPartialFile() throws InterruptedException {
    FSWindowDataManager fsIdempotentStorageManager = new FSWindowDataManager();
    fsIdempotentStorageManager.setStatePath(testMeta.dataDirectory + '/' + "recovery");
    testMeta.fileSplitter.setWindowDataManager(fsIdempotentStorageManager);
    testMeta.fileSplitter.setBlockSize(2L);
    testMeta.fileSplitter.setBlocksThreshold(2);
    testMeta.fileSplitter.scanner.setScanIntervalMillis(500);
    testMeta.fileSplitter.setup(testMeta.context);
    testMeta.fileSplitter.beginWindow(1);
    testMeta.exchanger.exchange(null);
    testMeta.fileSplitter.emitTuples();
    testMeta.fileSplitter.endWindow();
    // file0.txt has just 5 blocks. Since blocks threshold is 2, only 2 are emitted.
    Assert.assertEquals("Files", 1, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Blocks", 2, testMeta.blockMetadataSink.collectedTuples.size());
    testMeta.fileMetadataSink.clear();
    testMeta.blockMetadataSink.clear();
    // there was a failure and the operator was re-deployed
    testMeta.fileSplitter.setup(testMeta.context);
    testMeta.fileSplitter.beginWindow(1);
    Assert.assertEquals("Recovered Files", 1, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Recovered Blocks", 2, testMeta.blockMetadataSink.collectedTuples.size());
    testMeta.fileSplitter.beginWindow(2);
    testMeta.fileSplitter.emitTuples();
    testMeta.fileSplitter.endWindow();
    Assert.assertEquals("Blocks", 4, testMeta.blockMetadataSink.collectedTuples.size());
    String file1 = testMeta.fileMetadataSink.collectedTuples.get(0).getFileName();
    testMeta.fileMetadataSink.clear();
    testMeta.blockMetadataSink.clear();
    testMeta.fileSplitter.beginWindow(3);
    testMeta.exchanger.exchange(null);
    testMeta.fileSplitter.emitTuples();
    testMeta.fileSplitter.endWindow();
    Assert.assertEquals("New file", 1, testMeta.fileMetadataSink.collectedTuples.size());
    Assert.assertEquals("Blocks", 2, testMeta.blockMetadataSink.collectedTuples.size());
    String file2 = testMeta.fileMetadataSink.collectedTuples.get(0).getFileName();
    Assert.assertTrue("Block file name 0", testMeta.blockMetadataSink.collectedTuples.get(0).getFilePath().endsWith(file1));
    Assert.assertTrue("Block file name 1", testMeta.blockMetadataSink.collectedTuples.get(1).getFilePath().endsWith(file2));
}
Also used : FSWindowDataManager(org.apache.apex.malhar.lib.wal.FSWindowDataManager) Ignore(org.junit.Ignore)

Aggregations

FSWindowDataManager (org.apache.apex.malhar.lib.wal.FSWindowDataManager)28 Test (org.junit.Test)14 File (java.io.File)9 CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)6 Path (org.apache.hadoop.fs.Path)6 Attribute (com.datatorrent.api.Attribute)5 LineByLineFileInputOperator (org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator)5 DAG (com.datatorrent.api.DAG)4 LocalMode (com.datatorrent.api.LocalMode)4 BlockMetadata (org.apache.apex.malhar.lib.io.block.BlockMetadata)3 OperatorContext (com.datatorrent.api.Context.OperatorContext)2 IOException (java.io.IOException)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 MockSiteToSiteClient (org.apache.apex.malhar.contrib.nifi.mock.MockSiteToSiteClient)2 OperatorContextTestHelper.mockOperatorContext (org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext)2 ConsoleOutputOperator (org.apache.apex.malhar.lib.io.ConsoleOutputOperator)2 Before (org.junit.Before)2 DefaultPartition (com.datatorrent.api.DefaultPartition)1 Partitioner (com.datatorrent.api.Partitioner)1 Partition (com.datatorrent.api.Partitioner.Partition)1