use of org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testRecoveryWithPendingFile.
@Test
public void testRecoveryWithPendingFile() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
List<String> allLines = Lists.newArrayList();
HashSet<String> lines = Sets.newHashSet();
for (int line = 0; line < 5; line++) {
lines.add("f0" + "l" + line);
}
allLines.addAll(lines);
File testFile = new File(testMeta.dir, "file0");
FileUtils.write(testFile, StringUtils.join(lines, '\n'));
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.scanner = null;
oper.pendingFiles.add(testFile.getAbsolutePath());
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
oper.setDirectory(testMeta.dir);
oper.setup(testMeta.context);
oper.beginWindow(0);
oper.emitTuples();
oper.endWindow();
oper.teardown();
Assert.assertEquals("number tuples", 5, queryResults.collectedTuples.size());
Assert.assertEquals("lines", allLines, new ArrayList<String>(queryResults.collectedTuples));
}
use of org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testIdempotencyWithMultipleEmitTuples.
@Test
public void testIdempotencyWithMultipleEmitTuples() throws Exception {
FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
List<String> allLines = Lists.newArrayList();
for (int file = 0; file < 2; file++) {
List<String> lines = Lists.newArrayList();
for (int line = 0; line < 2; line++) {
lines.add("f" + file + "l" + line);
}
allLines.addAll(lines);
FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
}
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
FSWindowDataManager manager = new FSWindowDataManager();
manager.setStatePath(testMeta.dir + "/recovery");
oper.setWindowDataManager(manager);
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
TestUtils.setSink(oper.output, queryResults);
oper.setDirectory(testMeta.dir);
oper.getScanner().setFilePatternRegexp(".*file[\\d]");
oper.setup(testMeta.context);
oper.beginWindow(0);
for (int i = 0; i < 3; i++) {
oper.emitTuples();
}
oper.endWindow();
oper.teardown();
List<String> beforeRecovery = Lists.newArrayList(queryResults.collectedTuples);
queryResults.clear();
// idempotency part
oper.setup(testMeta.context);
oper.beginWindow(0);
oper.endWindow();
Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
Assert.assertEquals("lines", beforeRecovery, queryResults.collectedTuples);
oper.teardown();
}
use of org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testWithCustomScanner.
/**
* Partition the operator in 2
* create ten files with index of the file at the start, i.e 1_file, 2_file .. etc.
* The scanner returns this index from getPartition method.
* each partition should read 5 files as file index are from 0 to 9 (including 0 and 9).
* @throws Exception
*/
@Test
public void testWithCustomScanner() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.setScanner(new MyScanner());
oper.getScanner().setFilePatternRegexp(".*partition_([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
Random rand = new Random();
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
for (int file = 0; file < 10; file++) {
FileUtils.write(new File(testMeta.dir, file + "_partition_00" + rand.nextInt(100)), "");
}
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2));
Assert.assertEquals(2, newPartitions.size());
// partitioned() wasn't called
Assert.assertEquals(1, oper.getCurrentPartitions());
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
Assert.assertNotSame(oper, p.getPartitionedInstance());
Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
Set<String> consumed = Sets.newHashSet();
LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner().scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
Assert.assertEquals("partition " + files, 6, files.size());
}
}
use of org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testPartitioning.
@Test
public void testPartitioning() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
for (int file = 0; file < 4; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
}
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2));
Assert.assertEquals(2, newPartitions.size());
// partitioned() wasn't called
Assert.assertEquals(1, oper.getCurrentPartitions());
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
Assert.assertNotSame(oper, p.getPartitionedInstance());
Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
Set<String> consumed = Sets.newHashSet();
LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner().scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
Assert.assertEquals("partition " + files, 3, files.size());
}
}
use of org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testWindowDataManagerPartitioning.
@Test
public void testWindowDataManagerPartitioning() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
oper.setWindowDataManager(new FSWindowDataManager());
oper.operatorId = 7;
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
for (int file = 0; file < 4; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
}
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2));
Assert.assertEquals(2, newPartitions.size());
Assert.assertEquals(1, oper.getCurrentPartitions());
List<FSWindowDataManager> storageManagers = Lists.newLinkedList();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
storageManagers.add((FSWindowDataManager) p.getPartitionedInstance().getWindowDataManager());
}
Assert.assertEquals("count of storage managers", 2, storageManagers.size());
int countOfDeleteManagers = 0;
FSWindowDataManager deleteManager = null;
for (FSWindowDataManager storageManager : storageManagers) {
if (storageManager.getDeletedOperators() != null) {
countOfDeleteManagers++;
deleteManager = storageManager;
}
}
Assert.assertEquals("count of delete managers", 1, countOfDeleteManagers);
Assert.assertNotNull("deleted operators manager", deleteManager);
Assert.assertEquals("deleted operators", Sets.newHashSet(7), deleteManager.getDeletedOperators());
}
Aggregations