use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testPartitioningStateTransferFailure.
/**
* Test for testing dynamic partitioning interrupting ongoing read.
* - Create 4 file with 3 records each.
* - Create a single partition, and read some records, populating pending files in operator.
* - Split it in two operators
* - Try to emit the remaining records.
*/
@Test
public void testPartitioningStateTransferFailure() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
oper.setScanIntervalMillis(0);
oper.setEmitBatchSize(2);
LineByLineFileInputOperator initialState = new Kryo().copy(oper);
// Create 4 files with 3 records each.
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
int file;
for (file = 0; file < 4; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
}
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
int wid = 0;
// Read some records
oper.setup(testMeta.context);
for (int i = 0; i < 5; i++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
wid++;
}
Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
initialState.setPartitionCount(2);
StatsListener.Response rsp = initialState.processStats(null);
Assert.assertEquals(true, rsp.repartitionRequired);
// Create partitions of the operator.
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
// incremental capacity controlled partitionCount property
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
Assert.assertEquals(2, newPartitions.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
m.put(m.size(), p);
}
initialState.partitioned(m);
Assert.assertEquals(2, initialState.getCurrentPartitions());
/* Collect all operators in a list */
List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
oi.setup(testMeta.context);
oi.output.setSink(sink);
opers.add(oi);
}
sink.clear();
for (int i = 0; i < 10; i++) {
for (AbstractFileInputOperator<String> o : opers) {
o.beginWindow(wid);
o.emitTuples();
o.endWindow();
}
wid++;
}
// No record should be read.
Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}
use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testPartitioningStateTransfer.
/**
* Test for testing dynamic partitioning.
* - Create 4 file with 3 records each.
* - Create a single partition, and read all records, populating pending files in operator.
* - Split it in two operators
* - Try to emit records again, expected result is no record is emitted, as all files are
* processed.
* - Create another 4 files with 3 records each
* - Try to emit records again, expected result total record emitted 4 * 3 = 12.
*/
@Test
public void testPartitioningStateTransfer() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
oper.setScanIntervalMillis(0);
LineByLineFileInputOperator initialState = new Kryo().copy(oper);
// Create 4 files with 3 records each.
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
int file;
for (file = 0; file < 4; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
}
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
int wid = 0;
// Read all records to populate processedList in operator.
oper.setup(testMeta.context);
for (int i = 0; i < 10; i++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
wid++;
}
Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
initialState.setPartitionCount(2);
StatsListener.Response rsp = initialState.processStats(null);
Assert.assertEquals(true, rsp.repartitionRequired);
// Create partitions of the operator.
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
// incremental capacity controlled partitionCount property
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
Assert.assertEquals(2, newPartitions.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
m.put(m.size(), p);
}
initialState.partitioned(m);
Assert.assertEquals(2, initialState.getCurrentPartitions());
/* Collect all operators in a list */
List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
oi.setup(testMeta.context);
oi.output.setSink(sink);
opers.add(oi);
}
sink.clear();
for (int i = 0; i < 10; i++) {
for (AbstractFileInputOperator<String> o : opers) {
o.beginWindow(wid);
o.emitTuples();
o.endWindow();
}
wid++;
}
// No record should be read.
Assert.assertEquals("No new tuples read ", 0, sink.collectedTuples.size());
// Add four new files with 3 records each.
for (; file < 8; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
}
for (int i = 0; i < 10; i++) {
for (AbstractFileInputOperator<String> o : opers) {
o.beginWindow(wid);
o.emitTuples();
o.endWindow();
}
wid++;
}
// If all files are processed only once then number of records emitted should
// be 12.
Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
}
use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testPartitioningStateTransferInterrupted.
/**
* Test for testing dynamic partitioning.
* - Create 4 file with 3 records each.
* - Create a single partition, and read some records, populating pending files in operator.
* - Split it in two operators
* - Try to emit the remaining records.
*/
@Test
public void testPartitioningStateTransferInterrupted() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
oper.setScanIntervalMillis(0);
oper.setEmitBatchSize(2);
LineByLineFileInputOperator initialState = new Kryo().copy(oper);
// Create 4 files with 3 records each.
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
int file;
for (file = 0; file < 4; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
}
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
int wid = 0;
// Read some records
oper.setup(testMeta.context);
for (int i = 0; i < 5; i++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
wid++;
}
Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
initialState.setPartitionCount(2);
StatsListener.Response rsp = initialState.processStats(null);
Assert.assertEquals(true, rsp.repartitionRequired);
// Create partitions of the operator.
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
// incremental capacity controlled partitionCount property
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
Assert.assertEquals(2, newPartitions.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
m.put(m.size(), p);
}
initialState.partitioned(m);
Assert.assertEquals(2, initialState.getCurrentPartitions());
/* Collect all operators in a list */
List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
oi.setup(testMeta.context);
oi.output.setSink(sink);
opers.add(oi);
}
sink.clear();
for (int i = 0; i < 10; i++) {
for (AbstractFileInputOperator<String> o : opers) {
o.beginWindow(wid);
o.emitTuples();
o.endWindow();
}
wid++;
}
Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}
use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testWithCustomScanner.
/**
* Partition the operator in 2
* create ten files with index of the file at the start, i.e 1_file, 2_file .. etc.
* The scanner returns this index from getPartition method.
* each partition should read 5 files as file index are from 0 to 9 (including 0 and 9).
* @throws Exception
*/
@Test
public void testWithCustomScanner() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.setScanner(new MyScanner());
oper.getScanner().setFilePatternRegexp(".*partition_([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
Random rand = new Random();
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
for (int file = 0; file < 10; file++) {
FileUtils.write(new File(testMeta.dir, file + "_partition_00" + rand.nextInt(100)), "");
}
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2));
Assert.assertEquals(2, newPartitions.size());
// partitioned() wasn't called
Assert.assertEquals(1, oper.getCurrentPartitions());
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
Assert.assertNotSame(oper, p.getPartitionedInstance());
Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
Set<String> consumed = Sets.newHashSet();
LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner().scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
Assert.assertEquals("partition " + files, 6, files.size());
}
}
use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testPartitioning.
@Test
public void testPartitioning() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
for (int file = 0; file < 4; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
}
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2));
Assert.assertEquals(2, newPartitions.size());
// partitioned() wasn't called
Assert.assertEquals(1, oper.getCurrentPartitions());
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
Assert.assertNotSame(oper, p.getPartitionedInstance());
Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
Set<String> consumed = Sets.newHashSet();
LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner().scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
Assert.assertEquals("partition " + files, 3, files.size());
}
}
Aggregations