Search in sources :

Example 91 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testStateWithIdempotency.

@Test
public void testStateWithIdempotency() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    HashSet<String> allLines = Sets.newHashSet();
    for (int file = 0; file < 3; file++) {
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
    }
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    FSWindowDataManager manager = new FSWindowDataManager();
    manager.setStatePath(testMeta.dir + "/recovery");
    oper.setWindowDataManager(manager);
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");
    oper.setup(testMeta.context);
    for (long wid = 0; wid < 4; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();
    sink.clear();
    // idempotency  part
    oper.pendingFiles.add(new File(testMeta.dir, "file0").getAbsolutePath());
    oper.failedFiles.add(new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file1").getAbsolutePath(), 0));
    oper.unfinishedFiles.add(new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file2").getAbsolutePath(), 0));
    oper.setup(testMeta.context);
    for (long wid = 0; wid < 4; wid++) {
        oper.beginWindow(wid);
        oper.endWindow();
    }
    Assert.assertTrue("pending state", !oper.pendingFiles.contains("file0"));
    for (AbstractFileInputOperator.FailedFile failedFile : oper.failedFiles) {
        Assert.assertTrue("failed state", !failedFile.path.equals("file1"));
    }
    for (AbstractFileInputOperator.FailedFile unfinishedFile : oper.unfinishedFiles) {
        Assert.assertTrue("unfinished state", !unfinishedFile.path.equals("file2"));
    }
    oper.teardown();
}
Also used : Path(org.apache.hadoop.fs.Path) FSWindowDataManager(org.apache.apex.malhar.lib.wal.FSWindowDataManager) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 92 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method checkSubDir.

private void checkSubDir(boolean recursive) throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    HashSet<String> allLines = Sets.newHashSet();
    String subdir = "";
    for (int file = 0; file < 2; file++) {
        subdir += String.format("/depth_%d", file);
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir + subdir, "file" + file), StringUtils.join(lines, '\n'));
    }
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp("((?!target).)*file[\\d]");
    oper.getScanner().setRecursive(recursive);
    oper.setup(testMeta.context);
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();
    int expectedNumTuples = 4;
    if (!recursive) {
        allLines = new HashSet<String>();
        expectedNumTuples = 0;
    }
    Assert.assertEquals("number tuples", expectedNumTuples, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples));
}
Also used : Path(org.apache.hadoop.fs.Path) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink)

Example 93 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class DeduperBoundedPOJOImplTest method testDedup.

@Test
public void testDedup() {
    com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap attributes = new com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap();
    attributes.put(DAG.APPLICATION_ID, APP_ID);
    attributes.put(DAG.APPLICATION_PATH, applicationPath);
    attributes.put(DAG.InputPortMeta.TUPLE_CLASS, TestPojo.class);
    OperatorContext context = mockOperatorContext(OPERATOR_ID, attributes);
    deduper.setup(context);
    deduper.input.setup(new PortContext(attributes, context));
    deduper.activate(context);
    CollectorTestSink<TestPojo> uniqueSink = new CollectorTestSink<TestPojo>();
    TestUtils.setSink(deduper.unique, uniqueSink);
    CollectorTestSink<TestPojo> duplicateSink = new CollectorTestSink<TestPojo>();
    TestUtils.setSink(deduper.duplicate, duplicateSink);
    CollectorTestSink<TestPojo> expiredSink = new CollectorTestSink<TestPojo>();
    TestUtils.setSink(deduper.expired, expiredSink);
    deduper.beginWindow(0);
    Random r = new Random();
    int k = 1;
    for (int i = 1; i <= 1000; i++) {
        TestPojo pojo = new TestPojo(i, new Date(), k++);
        deduper.input.process(pojo);
        if (i % 10 == 0) {
            int dupId = r.nextInt(i);
            TestPojo pojoDuplicate = new TestPojo(dupId == 0 ? 1 : dupId, new Date(), k++);
            deduper.input.process(pojoDuplicate);
        }
    }
    deduper.handleIdleTime();
    deduper.endWindow();
    Assert.assertTrue(uniqueSink.collectedTuples.size() == 1000);
    Assert.assertTrue(duplicateSink.collectedTuples.size() == 100);
    deduper.teardown();
}
Also used : Date(java.util.Date) PortContext(com.datatorrent.stram.engine.PortContext) Random(java.util.Random) OperatorContextTestHelper.mockOperatorContext(org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext) OperatorContext(com.datatorrent.api.Context.OperatorContext) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 94 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class BottomNUnifierTest method testUnifier.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testUnifier() {
    // Instantiate unifier
    BottomNUnifier<String, Integer> oper = new BottomNUnifier<>();
    oper.setN(2);
    CollectorTestSink sink = new CollectorTestSink();
    oper.mergedport.setSink(sink);
    oper.beginWindow(1);
    ArrayList<Integer> values = new ArrayList<Integer>();
    values.add(5);
    values.add(2);
    HashMap<String, ArrayList<Integer>> tuple = new HashMap<String, ArrayList<Integer>>();
    tuple.put("a", values);
    oper.process(tuple);
    values = new ArrayList<Integer>();
    values.add(3);
    values.add(5);
    tuple = new HashMap<String, ArrayList<Integer>>();
    tuple.put("a", values);
    oper.process(tuple);
    oper.endWindow();
    Assert.assertEquals("Tuples in sink", sink.collectedTuples.size(), 1);
    tuple = (HashMap<String, ArrayList<Integer>>) sink.collectedTuples.get(0);
    values = tuple.get("a");
    Assert.assertEquals(2, values.size());
    Assert.assertEquals(true, values.indexOf(2) >= 0);
    Assert.assertEquals(true, values.indexOf(3) >= 0);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 95 with CollectorTestSink

use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.

the class FilterValuesTest method testNodeProcessing.

/**
 * Test node logic emits correct results
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testNodeProcessing() throws Exception {
    FilterValues<Integer> oper = new FilterValues<Integer>();
    CollectorTestSink sortSink = new CollectorTestSink();
    oper.filter.setSink(sortSink);
    Integer[] values = new Integer[2];
    oper.setValue(5);
    oper.clearValues();
    values[0] = 200;
    values[1] = 2;
    oper.setValue(4);
    oper.setValues(values);
    oper.beginWindow(0);
    oper.data.process(2);
    oper.data.process(5);
    oper.data.process(7);
    oper.data.process(42);
    oper.data.process(200);
    oper.data.process(2);
    Assert.assertEquals("number emitted tuples", 3, sortSink.collectedTuples.size());
    Assert.assertEquals("Total filtered value is ", 204, getTotal(sortSink.collectedTuples));
    sortSink.clear();
    oper.data.process(5);
    Assert.assertEquals("number emitted tuples", 0, sortSink.collectedTuples.size());
    Assert.assertEquals("Total filtered value is ", 0, getTotal(sortSink.collectedTuples));
    sortSink.clear();
    oper.data.process(2);
    oper.data.process(33);
    oper.data.process(2);
    Assert.assertEquals("number emitted tuples", 2, sortSink.collectedTuples.size());
    Assert.assertEquals("Total filtered value is ", 4, getTotal(sortSink.collectedTuples));
    sortSink.clear();
    oper.data.process(6);
    oper.data.process(2);
    oper.data.process(6);
    oper.data.process(2);
    oper.data.process(6);
    oper.data.process(2);
    oper.data.process(6);
    oper.data.process(2);
    Assert.assertEquals("number emitted tuples", 4, sortSink.collectedTuples.size());
    Assert.assertEquals("Total filtered value is ", 8, getTotal(sortSink.collectedTuples));
    sortSink.clear();
    oper.setInverse(true);
    oper.data.process(9);
    Assert.assertEquals("number emitted tuples", 1, sortSink.collectedTuples.size());
    Assert.assertEquals("Total filtered value is ", 9, getTotal(sortSink.collectedTuples));
    oper.endWindow();
}
Also used : CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Aggregations

CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)162 Test (org.junit.Test)133 HashMap (java.util.HashMap)56 Map (java.util.Map)33 File (java.io.File)21 ArrayList (java.util.ArrayList)21 OperatorContext (com.datatorrent.api.Context.OperatorContext)19 OperatorContextTestHelper.mockOperatorContext (org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext)18 KeyValPair (org.apache.apex.malhar.lib.util.KeyValPair)15 Path (org.apache.hadoop.fs.Path)15 Attribute (com.datatorrent.api.Attribute)14 ColumnIndex (org.apache.apex.malhar.lib.streamquery.index.ColumnIndex)13 LineByLineFileInputOperator (org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator)12 Kryo (com.esotericsoftware.kryo.Kryo)10 Date (java.util.Date)8 TestPortContext (org.apache.apex.malhar.lib.helper.TestPortContext)8 PortContext (com.datatorrent.stram.engine.PortContext)7 List (java.util.List)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 SelectOperator (org.apache.apex.malhar.contrib.misc.streamquery.SelectOperator)6