use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testRecoveryWithCurrentFile.
@Test
public void testRecoveryWithCurrentFile() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
List<String> allLines = Lists.newArrayList();
HashSet<String> lines = Sets.newHashSet();
for (int line = 0; line < 5; line++) {
lines.add("f0" + "l" + line);
}
allLines.addAll(lines);
File testFile = new File(testMeta.dir, "file0");
FileUtils.write(testFile, StringUtils.join(lines, '\n'));
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.scanner = null;
oper.currentFile = testFile.getAbsolutePath();
oper.offset = 1;
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
oper.setDirectory(testMeta.dir);
oper.setup(testMeta.context);
oper.beginWindow(0);
oper.emitTuples();
oper.endWindow();
oper.teardown();
Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
Assert.assertEquals("lines", allLines.subList(1, allLines.size()), new ArrayList<String>(queryResults.collectedTuples));
}
use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testIdempotency.
@Test
public void testIdempotency() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
List<String> allLines = Lists.newArrayList();
for (int file = 0; file < 2; file++) {
List<String> lines = Lists.newArrayList();
for (int line = 0; line < 2; line++) {
lines.add("f" + file + "l" + line);
}
allLines.addAll(lines);
FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
}
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
FSWindowDataManager manager = new FSWindowDataManager();
manager.setStatePath(testMeta.dir + "/recovery");
oper.setWindowDataManager(manager);
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
TestUtils.setSink(oper.output, queryResults);
oper.setDirectory(testMeta.dir);
oper.getScanner().setFilePatternRegexp(".*file[\\d]");
oper.setup(testMeta.context);
for (long wid = 0; wid < 3; wid++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
}
oper.teardown();
List<String> beforeRecovery = Lists.newArrayList(queryResults.collectedTuples);
queryResults.clear();
// idempotency part
oper.setup(testMeta.context);
for (long wid = 0; wid < 3; wid++) {
oper.beginWindow(wid);
oper.endWindow();
}
Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
Assert.assertEquals("lines", beforeRecovery, queryResults.collectedTuples);
oper.teardown();
}
use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testRecoveryWithPendingFile.
@Test
public void testRecoveryWithPendingFile() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
List<String> allLines = Lists.newArrayList();
HashSet<String> lines = Sets.newHashSet();
for (int line = 0; line < 5; line++) {
lines.add("f0" + "l" + line);
}
allLines.addAll(lines);
File testFile = new File(testMeta.dir, "file0");
FileUtils.write(testFile, StringUtils.join(lines, '\n'));
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.scanner = null;
oper.pendingFiles.add(testFile.getAbsolutePath());
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
oper.setDirectory(testMeta.dir);
oper.setup(testMeta.context);
oper.beginWindow(0);
oper.emitTuples();
oper.endWindow();
oper.teardown();
Assert.assertEquals("number tuples", 5, queryResults.collectedTuples.size());
Assert.assertEquals("lines", allLines, new ArrayList<String>(queryResults.collectedTuples));
}
use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testEmptyDirectory.
@Test
public void testEmptyDirectory() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
Set<String> dPaths = Sets.newHashSet();
dPaths.add(new File(testMeta.dir).getCanonicalPath());
String subdir01 = "/a";
dPaths.add(new File(testMeta.dir + subdir01).getCanonicalPath());
FileUtils.forceMkdir((new File(testMeta.dir + subdir01)));
String subdir02 = "/b";
dPaths.add(new File(testMeta.dir + subdir02).getCanonicalPath());
FileUtils.forceMkdir(new File(testMeta.dir + subdir02));
String subdir03 = subdir02 + "/c";
dPaths.add(new File(testMeta.dir + subdir03).getCanonicalPath());
FileUtils.forceMkdir(new File(testMeta.dir + subdir03));
String subdir04 = "/d";
List<String> allLines = Lists.newArrayList();
HashSet<String> lines = Sets.newHashSet();
for (int line = 0; line < 5; line++) {
lines.add("f0" + "l" + line);
}
allLines.addAll(lines);
File testFile = new File(testMeta.dir + subdir04, "file0");
dPaths.add(new File(testMeta.dir + subdir04).getCanonicalPath());
FileUtils.write(testFile, StringUtils.join(lines, '\n'));
LineOperator oper = new LineOperator();
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
oper.setScanIntervalMillis(0);
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
int wid = 0;
// Read all records to populate processedList in operator.
oper.setup(testMeta.context);
for (int i = 0; i < 3; i++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
wid++;
}
Assert.assertEquals("Size", 5, oper.dirPaths.size());
Assert.assertTrue("Checking Sets", dPaths.equals(oper.dirPaths));
}
use of org.apache.apex.malhar.lib.testbench.CollectorTestSink in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testIdempotencyWithMultipleEmitTuples.
@Test
public void testIdempotencyWithMultipleEmitTuples() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
List<String> allLines = Lists.newArrayList();
for (int file = 0; file < 2; file++) {
List<String> lines = Lists.newArrayList();
for (int line = 0; line < 2; line++) {
lines.add("f" + file + "l" + line);
}
allLines.addAll(lines);
FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
}
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
FSWindowDataManager manager = new FSWindowDataManager();
manager.setStatePath(testMeta.dir + "/recovery");
oper.setWindowDataManager(manager);
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
TestUtils.setSink(oper.output, queryResults);
oper.setDirectory(testMeta.dir);
oper.getScanner().setFilePatternRegexp(".*file[\\d]");
oper.setup(testMeta.context);
oper.beginWindow(0);
for (int i = 0; i < 3; i++) {
oper.emitTuples();
}
oper.endWindow();
oper.teardown();
List<String> beforeRecovery = Lists.newArrayList(queryResults.collectedTuples);
queryResults.clear();
// idempotency part
oper.setup(testMeta.context);
oper.beginWindow(0);
oper.endWindow();
Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
Assert.assertEquals("lines", beforeRecovery, queryResults.collectedTuples);
oper.teardown();
}
Aggregations