use of org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testStateWithIdempotency.
@Test
public void testStateWithIdempotency() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
HashSet<String> allLines = Sets.newHashSet();
for (int file = 0; file < 3; file++) {
HashSet<String> lines = Sets.newHashSet();
for (int line = 0; line < 2; line++) {
lines.add("f" + file + "l" + line);
}
allLines.addAll(lines);
FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
}
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
FSWindowDataManager manager = new FSWindowDataManager();
manager.setStatePath(testMeta.dir + "/recovery");
oper.setWindowDataManager(manager);
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
oper.setDirectory(testMeta.dir);
oper.getScanner().setFilePatternRegexp(".*file[\\d]");
oper.setup(testMeta.context);
for (long wid = 0; wid < 4; wid++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
}
oper.teardown();
sink.clear();
// idempotency part
oper.pendingFiles.add(new File(testMeta.dir, "file0").getAbsolutePath());
oper.failedFiles.add(new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file1").getAbsolutePath(), 0));
oper.unfinishedFiles.add(new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file2").getAbsolutePath(), 0));
oper.setup(testMeta.context);
for (long wid = 0; wid < 4; wid++) {
oper.beginWindow(wid);
oper.endWindow();
}
Assert.assertTrue("pending state", !oper.pendingFiles.contains("file0"));
for (AbstractFileInputOperator.FailedFile failedFile : oper.failedFiles) {
Assert.assertTrue("failed state", !failedFile.path.equals("file1"));
}
for (AbstractFileInputOperator.FailedFile unfinishedFile : oper.unfinishedFiles) {
Assert.assertTrue("unfinished state", !unfinishedFile.path.equals("file2"));
}
oper.teardown();
}
use of org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method checkSubDir.
private void checkSubDir(boolean recursive) throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
HashSet<String> allLines = Sets.newHashSet();
String subdir = "";
for (int file = 0; file < 2; file++) {
subdir += String.format("/depth_%d", file);
HashSet<String> lines = Sets.newHashSet();
for (int line = 0; line < 2; line++) {
lines.add("f" + file + "l" + line);
}
allLines.addAll(lines);
FileUtils.write(new File(testMeta.dir + subdir, "file" + file), StringUtils.join(lines, '\n'));
}
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
oper.setDirectory(testMeta.dir);
oper.getScanner().setFilePatternRegexp("((?!target).)*file[\\d]");
oper.getScanner().setRecursive(recursive);
oper.setup(testMeta.context);
for (long wid = 0; wid < 3; wid++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
}
oper.teardown();
int expectedNumTuples = 4;
if (!recursive) {
allLines = new HashSet<String>();
expectedNumTuples = 0;
}
Assert.assertEquals("number tuples", expectedNumTuples, queryResults.collectedTuples.size());
Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples));
}
Aggregations