use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testStateWithIdempotency.
@Test
public void testStateWithIdempotency() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
HashSet<String> allLines = Sets.newHashSet();
for (int file = 0; file < 3; file++) {
HashSet<String> lines = Sets.newHashSet();
for (int line = 0; line < 2; line++) {
lines.add("f" + file + "l" + line);
}
allLines.addAll(lines);
FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
}
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
FSWindowDataManager manager = new FSWindowDataManager();
manager.setStatePath(testMeta.dir + "/recovery");
oper.setWindowDataManager(manager);
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
oper.setDirectory(testMeta.dir);
oper.getScanner().setFilePatternRegexp(".*file[\\d]");
oper.setup(testMeta.context);
for (long wid = 0; wid < 4; wid++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
}
oper.teardown();
sink.clear();
// idempotency part
oper.pendingFiles.add(new File(testMeta.dir, "file0").getAbsolutePath());
oper.failedFiles.add(new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file1").getAbsolutePath(), 0));
oper.unfinishedFiles.add(new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file2").getAbsolutePath(), 0));
oper.setup(testMeta.context);
for (long wid = 0; wid < 4; wid++) {
oper.beginWindow(wid);
oper.endWindow();
}
Assert.assertTrue("pending state", !oper.pendingFiles.contains("file0"));
for (AbstractFileInputOperator.FailedFile failedFile : oper.failedFiles) {
Assert.assertTrue("failed state", !failedFile.path.equals("file1"));
}
for (AbstractFileInputOperator.FailedFile unfinishedFile : oper.unfinishedFiles) {
Assert.assertTrue("unfinished state", !unfinishedFile.path.equals("file2"));
}
oper.teardown();
}
use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method checkSubDir.
private void checkSubDir(boolean recursive) throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
HashSet<String> allLines = Sets.newHashSet();
String subdir = "";
for (int file = 0; file < 2; file++) {
subdir += String.format("/depth_%d", file);
HashSet<String> lines = Sets.newHashSet();
for (int line = 0; line < 2; line++) {
lines.add("f" + file + "l" + line);
}
allLines.addAll(lines);
FileUtils.write(new File(testMeta.dir + subdir, "file" + file), StringUtils.join(lines, '\n'));
}
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
oper.setDirectory(testMeta.dir);
oper.getScanner().setFilePatternRegexp("((?!target).)*file[\\d]");
oper.getScanner().setRecursive(recursive);
oper.setup(testMeta.context);
for (long wid = 0; wid < 3; wid++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
}
oper.teardown();
int expectedNumTuples = 4;
if (!recursive) {
allLines = new HashSet<String>();
expectedNumTuples = 0;
}
Assert.assertEquals("number tuples", expectedNumTuples, queryResults.collectedTuples.size());
Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples));
}
use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class DeduperBoundedPOJOImplTest method testDedup.
@Test
public void testDedup() {
com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap attributes = new com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap();
attributes.put(DAG.APPLICATION_ID, APP_ID);
attributes.put(DAG.APPLICATION_PATH, applicationPath);
attributes.put(DAG.InputPortMeta.TUPLE_CLASS, TestPojo.class);
OperatorContext context = mockOperatorContext(OPERATOR_ID, attributes);
deduper.setup(context);
deduper.input.setup(new PortContext(attributes, context));
deduper.activate(context);
CollectorTestSink<TestPojo> uniqueSink = new CollectorTestSink<TestPojo>();
TestUtils.setSink(deduper.unique, uniqueSink);
CollectorTestSink<TestPojo> duplicateSink = new CollectorTestSink<TestPojo>();
TestUtils.setSink(deduper.duplicate, duplicateSink);
CollectorTestSink<TestPojo> expiredSink = new CollectorTestSink<TestPojo>();
TestUtils.setSink(deduper.expired, expiredSink);
deduper.beginWindow(0);
Random r = new Random();
int k = 1;
for (int i = 1; i <= 1000; i++) {
TestPojo pojo = new TestPojo(i, new Date(), k++);
deduper.input.process(pojo);
if (i % 10 == 0) {
int dupId = r.nextInt(i);
TestPojo pojoDuplicate = new TestPojo(dupId == 0 ? 1 : dupId, new Date(), k++);
deduper.input.process(pojoDuplicate);
}
}
deduper.handleIdleTime();
deduper.endWindow();
Assert.assertTrue(uniqueSink.collectedTuples.size() == 1000);
Assert.assertTrue(duplicateSink.collectedTuples.size() == 100);
deduper.teardown();
}
use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class BottomNUnifierTest method testUnifier.
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testUnifier() {
// Instantiate unifier
BottomNUnifier<String, Integer> oper = new BottomNUnifier<>();
oper.setN(2);
CollectorTestSink sink = new CollectorTestSink();
oper.mergedport.setSink(sink);
oper.beginWindow(1);
ArrayList<Integer> values = new ArrayList<Integer>();
values.add(5);
values.add(2);
HashMap<String, ArrayList<Integer>> tuple = new HashMap<String, ArrayList<Integer>>();
tuple.put("a", values);
oper.process(tuple);
values = new ArrayList<Integer>();
values.add(3);
values.add(5);
tuple = new HashMap<String, ArrayList<Integer>>();
tuple.put("a", values);
oper.process(tuple);
oper.endWindow();
Assert.assertEquals("Tuples in sink", sink.collectedTuples.size(), 1);
tuple = (HashMap<String, ArrayList<Integer>>) sink.collectedTuples.get(0);
values = tuple.get("a");
Assert.assertEquals(2, values.size());
Assert.assertEquals(true, values.indexOf(2) >= 0);
Assert.assertEquals(true, values.indexOf(3) >= 0);
}
use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class FilterValuesTest method testNodeProcessing.
/**
* Test node logic emits correct results
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testNodeProcessing() throws Exception {
FilterValues<Integer> oper = new FilterValues<Integer>();
CollectorTestSink sortSink = new CollectorTestSink();
oper.filter.setSink(sortSink);
Integer[] values = new Integer[2];
oper.setValue(5);
oper.clearValues();
values[0] = 200;
values[1] = 2;
oper.setValue(4);
oper.setValues(values);
oper.beginWindow(0);
oper.data.process(2);
oper.data.process(5);
oper.data.process(7);
oper.data.process(42);
oper.data.process(200);
oper.data.process(2);
Assert.assertEquals("number emitted tuples", 3, sortSink.collectedTuples.size());
Assert.assertEquals("Total filtered value is ", 204, getTotal(sortSink.collectedTuples));
sortSink.clear();
oper.data.process(5);
Assert.assertEquals("number emitted tuples", 0, sortSink.collectedTuples.size());
Assert.assertEquals("Total filtered value is ", 0, getTotal(sortSink.collectedTuples));
sortSink.clear();
oper.data.process(2);
oper.data.process(33);
oper.data.process(2);
Assert.assertEquals("number emitted tuples", 2, sortSink.collectedTuples.size());
Assert.assertEquals("Total filtered value is ", 4, getTotal(sortSink.collectedTuples));
sortSink.clear();
oper.data.process(6);
oper.data.process(2);
oper.data.process(6);
oper.data.process(2);
oper.data.process(6);
oper.data.process(2);
oper.data.process(6);
oper.data.process(2);
Assert.assertEquals("number emitted tuples", 4, sortSink.collectedTuples.size());
Assert.assertEquals("Total filtered value is ", 8, getTotal(sortSink.collectedTuples));
sortSink.clear();
oper.setInverse(true);
oper.data.process(9);
Assert.assertEquals("number emitted tuples", 1, sortSink.collectedTuples.size());
Assert.assertEquals("Total filtered value is ", 9, getTotal(sortSink.collectedTuples));
oper.endWindow();
}
Aggregations