use of com.datatorrent.api.Partitioner.Partition in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testPartitioningStateTransferFailure.
/**
* Test for testing dynamic partitioning interrupting ongoing read.
* - Create 4 file with 3 records each.
* - Create a single partition, and read some records, populating pending files in operator.
* - Split it in two operators
* - Try to emit the remaining records.
*/
@Test
public void testPartitioningStateTransferFailure() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
oper.setScanIntervalMillis(0);
oper.setEmitBatchSize(2);
LineByLineFileInputOperator initialState = new Kryo().copy(oper);
// Create 4 files with 3 records each.
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
int file;
for (file = 0; file < 4; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
}
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
int wid = 0;
// Read some records
oper.setup(testMeta.context);
for (int i = 0; i < 5; i++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
wid++;
}
Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
initialState.setPartitionCount(2);
StatsListener.Response rsp = initialState.processStats(null);
Assert.assertEquals(true, rsp.repartitionRequired);
// Create partitions of the operator.
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
// incremental capacity controlled partitionCount property
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
Assert.assertEquals(2, newPartitions.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
m.put(m.size(), p);
}
initialState.partitioned(m);
Assert.assertEquals(2, initialState.getCurrentPartitions());
/* Collect all operators in a list */
List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
oi.setup(testMeta.context);
oi.output.setSink(sink);
opers.add(oi);
}
sink.clear();
for (int i = 0; i < 10; i++) {
for (AbstractFileInputOperator<String> o : opers) {
o.beginWindow(wid);
o.emitTuples();
o.endWindow();
}
wid++;
}
// No record should be read.
Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}
use of com.datatorrent.api.Partitioner.Partition in project apex-malhar by apache.
the class AbstractFileInputOperatorTest method testPartitioningStateTransfer.
/**
* Test for testing dynamic partitioning.
* - Create 4 file with 3 records each.
* - Create a single partition, and read all records, populating pending files in operator.
* - Split it in two operators
* - Try to emit records again, expected result is no record is emitted, as all files are
* processed.
* - Create another 4 files with 3 records each
* - Try to emit records again, expected result total record emitted 4 * 3 = 12.
*/
@Test
public void testPartitioningStateTransfer() throws Exception {
LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
oper.setScanIntervalMillis(0);
LineByLineFileInputOperator initialState = new Kryo().copy(oper);
// Create 4 files with 3 records each.
Path path = new Path(new File(testMeta.dir).getAbsolutePath());
FileContext.getLocalFSFileContext().delete(path, true);
int file;
for (file = 0; file < 4; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
}
CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
@SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
oper.output.setSink(sink);
int wid = 0;
// Read all records to populate processedList in operator.
oper.setup(testMeta.context);
for (int i = 0; i < 10; i++) {
oper.beginWindow(wid);
oper.emitTuples();
oper.endWindow();
wid++;
}
Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
initialState.setPartitionCount(2);
StatsListener.Response rsp = initialState.processStats(null);
Assert.assertEquals(true, rsp.repartitionRequired);
// Create partitions of the operator.
List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
// incremental capacity controlled partitionCount property
Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
Assert.assertEquals(2, newPartitions.size());
Assert.assertEquals(1, initialState.getCurrentPartitions());
Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
m.put(m.size(), p);
}
initialState.partitioned(m);
Assert.assertEquals(2, initialState.getCurrentPartitions());
/* Collect all operators in a list */
List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
oi.setup(testMeta.context);
oi.output.setSink(sink);
opers.add(oi);
}
sink.clear();
for (int i = 0; i < 10; i++) {
for (AbstractFileInputOperator<String> o : opers) {
o.beginWindow(wid);
o.emitTuples();
o.endWindow();
}
wid++;
}
// No record should be read.
Assert.assertEquals("No new tuples read ", 0, sink.collectedTuples.size());
// Add four new files with 3 records each.
for (; file < 8; file++) {
FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
}
for (int i = 0; i < 10; i++) {
for (AbstractFileInputOperator<String> o : opers) {
o.beginWindow(wid);
o.emitTuples();
o.endWindow();
}
wid++;
}
// If all files are processed only once then number of records emitted should
// be 12.
Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
}
use of com.datatorrent.api.Partitioner.Partition in project apex-core by apache.
the class PhysicalPlanTest method testDefaultRepartitioning.
@Test
public void testDefaultRepartitioning() {
List<PartitionKeys> twoBitPartitionKeys = Arrays.asList(newPartitionKeys("11", "00"), newPartitionKeys("11", "10"), newPartitionKeys("11", "01"), newPartitionKeys("11", "11"));
GenericTestOperator operator = new GenericTestOperator();
Set<PartitionKeys> initialPartitionKeys = Sets.newHashSet(newPartitionKeys("1", "0"), newPartitionKeys("1", "1"));
final ArrayList<Partition<Operator>> partitions = new ArrayList<>();
for (PartitionKeys pks : initialPartitionKeys) {
Map<InputPort<?>, PartitionKeys> p1Keys = new HashMap<>();
p1Keys.put(operator.inport1, pks);
partitions.add(new DefaultPartition<Operator>(operator, p1Keys, 1, null));
}
ArrayList<Partition<Operator>> lowLoadPartitions = new ArrayList<>();
for (Partition<Operator> p : partitions) {
lowLoadPartitions.add(new DefaultPartition<>(p.getPartitionedInstance(), p.getPartitionKeys(), -1, null));
}
// merge to single partition
List<Partition<Operator>> newPartitions = Lists.newArrayList();
Collection<Partition<Operator>> tempNewPartitions = StatelessPartitioner.repartition(lowLoadPartitions);
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
Assert.assertEquals("" + newPartitions.get(0).getPartitionKeys(), 0, newPartitions.get(0).getPartitionKeys().values().iterator().next().mask);
List<Partition<Operator>> tempList = Collections.singletonList((Partition<Operator>) new DefaultPartition<Operator>(operator, newPartitions.get(0).getPartitionKeys(), -1, null));
tempNewPartitions = StatelessPartitioner.repartition(tempList);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
// split back into two
tempList = Collections.singletonList((Partition<Operator>) new DefaultPartition<Operator>(operator, newPartitions.get(0).getPartitionKeys(), 1, null));
tempNewPartitions = StatelessPartitioner.repartition(tempList);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 2, newPartitions.size());
// split partitions
tempNewPartitions = StatelessPartitioner.repartition(partitions);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 4, newPartitions.size());
Set<PartitionKeys> expectedPartitionKeys = Sets.newHashSet(twoBitPartitionKeys);
for (Partition<?> p : newPartitions) {
Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
Assert.assertEquals("" + p.getPartitionKeys(), operator.inport1, p.getPartitionKeys().keySet().iterator().next());
PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
expectedPartitionKeys.remove(pks);
}
Assert.assertTrue("" + expectedPartitionKeys, expectedPartitionKeys.isEmpty());
// partition merge
List<HashSet<PartitionKeys>> expectedKeysSets = Arrays.asList(Sets.newHashSet(newPartitionKeys("11", "00"), newPartitionKeys("11", "10"), newPartitionKeys("1", "1")), Sets.newHashSet(newPartitionKeys("1", "0"), newPartitionKeys("11", "01"), newPartitionKeys("11", "11")));
for (Set<PartitionKeys> expectedKeys : expectedKeysSets) {
List<Partition<Operator>> clonePartitions = Lists.newArrayList();
for (PartitionKeys pks : twoBitPartitionKeys) {
Map<InputPort<?>, PartitionKeys> p1Keys = new HashMap<>();
p1Keys.put(operator.inport1, pks);
int load = expectedKeys.contains(pks) ? 0 : -1;
clonePartitions.add(new DefaultPartition<Operator>(operator, p1Keys, load, null));
}
tempNewPartitions = StatelessPartitioner.repartition(clonePartitions);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 3, newPartitions.size());
for (Partition<?> p : newPartitions) {
Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
Assert.assertEquals("" + p.getPartitionKeys(), operator.inport1, p.getPartitionKeys().keySet().iterator().next());
PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
expectedKeys.remove(pks);
}
Assert.assertTrue("" + expectedKeys, expectedKeys.isEmpty());
}
// merge 2 into single partition
lowLoadPartitions = Lists.newArrayList();
for (Partition<?> p : partitions) {
lowLoadPartitions.add(new DefaultPartition<Operator>(operator, p.getPartitionKeys(), -1, null));
}
tempNewPartitions = StatelessPartitioner.repartition(lowLoadPartitions);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
for (Partition<?> p : newPartitions) {
Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
Assert.assertEquals("" + pks, 0, pks.mask);
Assert.assertEquals("" + pks, Sets.newHashSet(0), pks.partitions);
}
}
use of com.datatorrent.api.Partitioner.Partition in project apex-core by apache.
the class DefaultPartition method assignPartitionKeys.
/**
* Assign partitions keys for the given list of partitions and port of the logical operator.
* <p>
* The incoming stream will be partitioned by n keys, with n the nearest power of 2 greater or equal to the
* number of partition instances provided. If the number of instances does not align with a power of 2, some of the
* partitions will be assigned 2 keys. This logic is used for default partitioning and can be used to implement
* {@link Partitioner}.
*
* @param <T> Type of the partitionable object
* @param partitions
* @param inputPort
*/
public static <T> void assignPartitionKeys(Collection<Partition<T>> partitions, InputPort<?> inputPort) {
if (partitions.isEmpty()) {
throw new IllegalArgumentException("partitions collection cannot be empty");
}
int partitionBits = (Integer.numberOfLeadingZeros(0) - Integer.numberOfLeadingZeros(partitions.size() - 1));
int partitionMask = 0;
if (partitionBits > 0) {
partitionMask = -1 >>> (Integer.numberOfLeadingZeros(-1)) - partitionBits;
}
Iterator<Partition<T>> iterator = partitions.iterator();
for (int i = 0; i <= partitionMask; i++) {
Partition<?> p;
if (iterator.hasNext()) {
p = iterator.next();
} else {
iterator = partitions.iterator();
p = iterator.next();
}
PartitionKeys pks = p.getPartitionKeys().get(inputPort);
if (pks == null) {
p.getPartitionKeys().put(inputPort, new PartitionKeys(partitionMask, Sets.newHashSet(i)));
} else {
pks.partitions.add(i);
}
}
}
use of com.datatorrent.api.Partitioner.Partition in project apex-malhar by apache.
the class CouchBaseInputOperatorTest method TestCouchBaseInputOperator.
@Test
public void TestCouchBaseInputOperator() throws Exception {
BucketConfiguration bucketConfiguration = new BucketConfiguration();
CouchbaseConnectionFactoryBuilder cfb = new CouchbaseConnectionFactoryBuilder();
CouchbaseMock mockCouchbase1 = createMock("default", "", bucketConfiguration);
CouchbaseMock mockCouchbase2 = createMock("default", "", bucketConfiguration);
mockCouchbase1.start();
mockCouchbase1.waitForStartup();
List<URI> uriList = new ArrayList<URI>();
int port1 = mockCouchbase1.getHttpPort();
logger.debug("port is {}", port1);
mockCouchbase2.start();
mockCouchbase2.waitForStartup();
int port2 = mockCouchbase2.getHttpPort();
logger.debug("port is {}", port2);
uriList.add(new URI("http", null, "localhost", port1, "/pools", "", ""));
connectionFactory = cfb.buildCouchbaseConnection(uriList, bucketConfiguration.name, bucketConfiguration.password);
client = new CouchbaseClient(connectionFactory);
CouchBaseStore store = new CouchBaseStore();
keyList = new ArrayList<String>();
store.setBucket(bucketConfiguration.name);
store.setPasswordConfig(password);
store.setPassword(bucketConfiguration.password);
store.setUriString("localhost:" + port1 + "," + "localhost:" + port1);
// couchbaseBucket.getCouchServers();
AttributeMap.DefaultAttributeMap attributeMap = new AttributeMap.DefaultAttributeMap();
attributeMap.put(DAG.APPLICATION_ID, APP_ID);
TestInputOperator inputOperator = new TestInputOperator();
inputOperator.setStore(store);
inputOperator.insertEventsInTable(10);
CollectorTestSink<Object> sink = new CollectorTestSink<Object>();
inputOperator.outputPort.setSink(sink);
List<Partition<AbstractCouchBaseInputOperator<String>>> partitions = Lists.newArrayList();
Collection<Partition<AbstractCouchBaseInputOperator<String>>> newPartitions = inputOperator.definePartitions(partitions, new PartitioningContextImpl(null, 0));
Assert.assertEquals(2, newPartitions.size());
for (Partition<AbstractCouchBaseInputOperator<String>> p : newPartitions) {
Assert.assertNotSame(inputOperator, p.getPartitionedInstance());
}
// Collect all operators in a list
List<AbstractCouchBaseInputOperator<String>> opers = Lists.newArrayList();
for (Partition<AbstractCouchBaseInputOperator<String>> p : newPartitions) {
TestInputOperator oi = (TestInputOperator) p.getPartitionedInstance();
oi.setServerURIString("localhost:" + port1);
oi.setStore(store);
oi.setup(null);
oi.outputPort.setSink(sink);
opers.add(oi);
port1 = port2;
}
sink.clear();
int wid = 0;
for (int i = 0; i < 10; i++) {
for (AbstractCouchBaseInputOperator<String> o : opers) {
o.beginWindow(wid);
o.emitTuples();
o.endWindow();
}
wid++;
}
Assert.assertEquals("Tuples read should be same ", 10, sink.collectedTuples.size());
for (AbstractCouchBaseInputOperator<String> o : opers) {
o.teardown();
}
mockCouchbase1.stop();
mockCouchbase2.stop();
}
Aggregations