Search in sources :

Example 86 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testRecoveryWithCurrentFile.

@Test
public void testRecoveryWithCurrentFile() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir, "file0");
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.scanner = null;
    oper.currentFile = testFile.getAbsolutePath();
    oper.offset = 1;
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    oper.setDirectory(testMeta.dir);
    oper.setup(testMeta.context);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.endWindow();
    oper.teardown();
    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines.subList(1, allLines.size()), new ArrayList<String>(queryResults.collectedTuples));
}
Also used : Path(org.apache.hadoop.fs.Path) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 87 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testIdempotency.

@Test
public void testIdempotency() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    for (int file = 0; file < 2; file++) {
        List<String> lines = Lists.newArrayList();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
    }
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    FSWindowDataManager manager = new FSWindowDataManager();
    manager.setStatePath(testMeta.dir + "/recovery");
    oper.setWindowDataManager(manager);
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    TestUtils.setSink(oper.output, queryResults);
    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");
    oper.setup(testMeta.context);
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();
    List<String> beforeRecovery = Lists.newArrayList(queryResults.collectedTuples);
    queryResults.clear();
    // idempotency  part
    oper.setup(testMeta.context);
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.endWindow();
    }
    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", beforeRecovery, queryResults.collectedTuples);
    oper.teardown();
}
Also used : Path(org.apache.hadoop.fs.Path) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) FSWindowDataManager(org.apache.apex.malhar.lib.wal.FSWindowDataManager) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 88 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testRecoveryWithPendingFile.

@Test
public void testRecoveryWithPendingFile() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir, "file0");
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.scanner = null;
    oper.pendingFiles.add(testFile.getAbsolutePath());
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    oper.setDirectory(testMeta.dir);
    oper.setup(testMeta.context);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.endWindow();
    oper.teardown();
    Assert.assertEquals("number tuples", 5, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines, new ArrayList<String>(queryResults.collectedTuples));
}
Also used : Path(org.apache.hadoop.fs.Path) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 89 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testEmptyDirectory.

@Test
public void testEmptyDirectory() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    Set<String> dPaths = Sets.newHashSet();
    dPaths.add(new File(testMeta.dir).getCanonicalPath());
    String subdir01 = "/a";
    dPaths.add(new File(testMeta.dir + subdir01).getCanonicalPath());
    FileUtils.forceMkdir((new File(testMeta.dir + subdir01)));
    String subdir02 = "/b";
    dPaths.add(new File(testMeta.dir + subdir02).getCanonicalPath());
    FileUtils.forceMkdir(new File(testMeta.dir + subdir02));
    String subdir03 = subdir02 + "/c";
    dPaths.add(new File(testMeta.dir + subdir03).getCanonicalPath());
    FileUtils.forceMkdir(new File(testMeta.dir + subdir03));
    String subdir04 = "/d";
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir + subdir04, "file0");
    dPaths.add(new File(testMeta.dir + subdir04).getCanonicalPath());
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));
    LineOperator oper = new LineOperator();
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    int wid = 0;
    // Read all records to populate processedList in operator.
    oper.setup(testMeta.context);
    for (int i = 0; i < 3; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }
    Assert.assertEquals("Size", 5, oper.dirPaths.size());
    Assert.assertTrue("Checking Sets", dPaths.equals(oper.dirPaths));
}
Also used : Path(org.apache.hadoop.fs.Path) File(java.io.File) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 90 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testIdempotencyWithMultipleEmitTuples.

@Test
public void testIdempotencyWithMultipleEmitTuples() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    for (int file = 0; file < 2; file++) {
        List<String> lines = Lists.newArrayList();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
    }
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    FSWindowDataManager manager = new FSWindowDataManager();
    manager.setStatePath(testMeta.dir + "/recovery");
    oper.setWindowDataManager(manager);
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    TestUtils.setSink(oper.output, queryResults);
    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");
    oper.setup(testMeta.context);
    oper.beginWindow(0);
    for (int i = 0; i < 3; i++) {
        oper.emitTuples();
    }
    oper.endWindow();
    oper.teardown();
    List<String> beforeRecovery = Lists.newArrayList(queryResults.collectedTuples);
    queryResults.clear();
    // idempotency  part
    oper.setup(testMeta.context);
    oper.beginWindow(0);
    oper.endWindow();
    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", beforeRecovery, queryResults.collectedTuples);
    oper.teardown();
}
Also used : Path(org.apache.hadoop.fs.Path) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) FSWindowDataManager(org.apache.apex.malhar.lib.wal.FSWindowDataManager) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Aggregations

CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)162 Test (org.junit.Test)133 HashMap (java.util.HashMap)56 Map (java.util.Map)33 File (java.io.File)21 ArrayList (java.util.ArrayList)21 OperatorContext (com.datatorrent.api.Context.OperatorContext)19 OperatorContextTestHelper.mockOperatorContext (org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext)18 KeyValPair (org.apache.apex.malhar.lib.util.KeyValPair)15 Path (org.apache.hadoop.fs.Path)15 Attribute (com.datatorrent.api.Attribute)14 ColumnIndex (org.apache.apex.malhar.lib.streamquery.index.ColumnIndex)13 LineByLineFileInputOperator (org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator)12 Kryo (com.esotericsoftware.kryo.Kryo)10 Date (java.util.Date)8 TestPortContext (org.apache.apex.malhar.lib.helper.TestPortContext)8 PortContext (com.datatorrent.stram.engine.PortContext)7 List (java.util.List)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 SelectOperator (org.apache.apex.malhar.contrib.misc.streamquery.SelectOperator)6