use of com.datatorrent.api.Partitioner.PartitionKeys in project apex-core by apache.
the class PhysicalPlanTest method testDefaultRepartitioning.
@Test
public void testDefaultRepartitioning() {
List<PartitionKeys> twoBitPartitionKeys = Arrays.asList(newPartitionKeys("11", "00"), newPartitionKeys("11", "10"), newPartitionKeys("11", "01"), newPartitionKeys("11", "11"));
GenericTestOperator operator = new GenericTestOperator();
Set<PartitionKeys> initialPartitionKeys = Sets.newHashSet(newPartitionKeys("1", "0"), newPartitionKeys("1", "1"));
final ArrayList<Partition<Operator>> partitions = new ArrayList<>();
for (PartitionKeys pks : initialPartitionKeys) {
Map<InputPort<?>, PartitionKeys> p1Keys = new HashMap<>();
p1Keys.put(operator.inport1, pks);
partitions.add(new DefaultPartition<Operator>(operator, p1Keys, 1, null));
}
ArrayList<Partition<Operator>> lowLoadPartitions = new ArrayList<>();
for (Partition<Operator> p : partitions) {
lowLoadPartitions.add(new DefaultPartition<>(p.getPartitionedInstance(), p.getPartitionKeys(), -1, null));
}
// merge to single partition
List<Partition<Operator>> newPartitions = Lists.newArrayList();
Collection<Partition<Operator>> tempNewPartitions = StatelessPartitioner.repartition(lowLoadPartitions);
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
Assert.assertEquals("" + newPartitions.get(0).getPartitionKeys(), 0, newPartitions.get(0).getPartitionKeys().values().iterator().next().mask);
List<Partition<Operator>> tempList = Collections.singletonList((Partition<Operator>) new DefaultPartition<Operator>(operator, newPartitions.get(0).getPartitionKeys(), -1, null));
tempNewPartitions = StatelessPartitioner.repartition(tempList);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
// split back into two
tempList = Collections.singletonList((Partition<Operator>) new DefaultPartition<Operator>(operator, newPartitions.get(0).getPartitionKeys(), 1, null));
tempNewPartitions = StatelessPartitioner.repartition(tempList);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 2, newPartitions.size());
// split partitions
tempNewPartitions = StatelessPartitioner.repartition(partitions);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 4, newPartitions.size());
Set<PartitionKeys> expectedPartitionKeys = Sets.newHashSet(twoBitPartitionKeys);
for (Partition<?> p : newPartitions) {
Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
Assert.assertEquals("" + p.getPartitionKeys(), operator.inport1, p.getPartitionKeys().keySet().iterator().next());
PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
expectedPartitionKeys.remove(pks);
}
Assert.assertTrue("" + expectedPartitionKeys, expectedPartitionKeys.isEmpty());
// partition merge
List<HashSet<PartitionKeys>> expectedKeysSets = Arrays.asList(Sets.newHashSet(newPartitionKeys("11", "00"), newPartitionKeys("11", "10"), newPartitionKeys("1", "1")), Sets.newHashSet(newPartitionKeys("1", "0"), newPartitionKeys("11", "01"), newPartitionKeys("11", "11")));
for (Set<PartitionKeys> expectedKeys : expectedKeysSets) {
List<Partition<Operator>> clonePartitions = Lists.newArrayList();
for (PartitionKeys pks : twoBitPartitionKeys) {
Map<InputPort<?>, PartitionKeys> p1Keys = new HashMap<>();
p1Keys.put(operator.inport1, pks);
int load = expectedKeys.contains(pks) ? 0 : -1;
clonePartitions.add(new DefaultPartition<Operator>(operator, p1Keys, load, null));
}
tempNewPartitions = StatelessPartitioner.repartition(clonePartitions);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 3, newPartitions.size());
for (Partition<?> p : newPartitions) {
Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
Assert.assertEquals("" + p.getPartitionKeys(), operator.inport1, p.getPartitionKeys().keySet().iterator().next());
PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
expectedKeys.remove(pks);
}
Assert.assertTrue("" + expectedKeys, expectedKeys.isEmpty());
}
// merge 2 into single partition
lowLoadPartitions = Lists.newArrayList();
for (Partition<?> p : partitions) {
lowLoadPartitions.add(new DefaultPartition<Operator>(operator, p.getPartitionKeys(), -1, null));
}
tempNewPartitions = StatelessPartitioner.repartition(lowLoadPartitions);
newPartitions.clear();
newPartitions.addAll(tempNewPartitions);
Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
for (Partition<?> p : newPartitions) {
Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
Assert.assertEquals("" + pks, 0, pks.mask);
Assert.assertEquals("" + pks, Sets.newHashSet(0), pks.partitions);
}
}
use of com.datatorrent.api.Partitioner.PartitionKeys in project apex-core by apache.
the class PhysicalPlanTest method testRepartitioningScaleDown.
@Test
public void testRepartitioningScaleDown() {
LogicalPlan dag = new LogicalPlan();
GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
GenericTestOperator o3parallel = dag.addOperator("o3parallel", GenericTestOperator.class);
OperatorMeta o3Meta = dag.getMeta(o3parallel);
GenericTestOperator mergeNode = dag.addOperator("mergeNode", GenericTestOperator.class);
dag.addStream("o1.outport1", o1.outport1, o2.inport1, o2.inport2);
dag.addStream("o2.outport1", o2.outport1, o3parallel.inport1).setLocality(Locality.CONTAINER_LOCAL);
dag.setInputPortAttribute(o3parallel.inport1, PortContext.PARTITION_PARALLEL, true);
dag.addStream("o3parallel_outport1", o3parallel.outport1, mergeNode.inport1);
dag.getAttributes().put(LogicalPlan.CONTAINERS_MAX_COUNT, 2);
OperatorMeta node2Meta = dag.getMeta(o2);
node2Meta.getAttributes().put(OperatorContext.STATS_LISTENERS, Lists.newArrayList((StatsListener) new PartitionLoadWatch(3, 5)));
node2Meta.getAttributes().put(OperatorContext.PARTITIONER, new StatelessPartitioner<GenericTestOperator>(8));
TestPlanContext ctx = new TestPlanContext();
dag.setAttribute(OperatorContext.STORAGE_AGENT, ctx);
PhysicalPlan plan = new PhysicalPlan(dag, ctx);
Assert.assertEquals("number of containers", 2, plan.getContainers().size());
Assert.assertEquals("Count of storage requests", plan.getAllOperators().size(), ctx.backupRequests);
List<PTOperator> n2Instances = plan.getOperators(node2Meta);
Assert.assertEquals("partition instances " + n2Instances, 8, n2Instances.size());
PTOperator po = n2Instances.get(0);
Collection<PTOperator> unifiers = plan.getMergeOperators(node2Meta);
Assert.assertEquals("unifiers " + node2Meta, 0, unifiers.size());
Collection<PTOperator> o3unifiers = plan.getOperators(dag.getMeta(mergeNode)).get(0).upstreamMerge.values();
Assert.assertEquals("unifiers " + o3Meta, 1, o3unifiers.size());
PTOperator o3unifier = o3unifiers.iterator().next();
Assert.assertEquals("unifier inputs " + o3unifier, 8, o3unifier.getInputs().size());
Set<PTOperator> expUndeploy = Sets.newHashSet(plan.getOperators(dag.getMeta(mergeNode)));
expUndeploy.addAll(n2Instances);
expUndeploy.addAll(plan.getOperators(o3Meta));
expUndeploy.addAll(o3unifiers);
// verify load update generates expected events per configuration
Assert.assertEquals("stats handlers " + po, 1, po.statsListeners.size());
StatsListener l = po.statsListeners.get(0);
Assert.assertTrue("stats handlers " + po.statsListeners, l instanceof PartitionLoadWatch);
// no delay
((PartitionLoadWatch) l).evalIntervalMillis = -1;
setThroughput(po, 5);
plan.onStatusUpdate(po);
Assert.assertEquals("load upper bound", 0, ctx.events.size());
setThroughput(po, 3);
plan.onStatusUpdate(po);
Assert.assertEquals("load lower bound", 0, ctx.events.size());
setThroughput(po, 2);
plan.onStatusUpdate(po);
Assert.assertEquals("load below min", 1, ctx.events.size());
ctx.backupRequests = 0;
ctx.events.remove(0).run();
// expect operators unchanged
Assert.assertEquals("partitions unchanged", Sets.newHashSet(n2Instances), Sets.newHashSet(plan.getOperators(node2Meta)));
for (PTOperator o : n2Instances) {
setThroughput(o, 2);
plan.onStatusUpdate(o);
}
Assert.assertEquals("load below min", 1, ctx.events.size());
ctx.events.remove(0).run();
Assert.assertEquals("partitions merged", 4, plan.getOperators(node2Meta).size());
Assert.assertEquals("unifier inputs after scale down " + o3unifier, 4, o3unifier.getInputs().size());
for (PTOperator p : plan.getOperators(o3Meta)) {
Assert.assertEquals("outputs " + p.getOutputs(), 1, p.getOutputs().size());
}
for (PTOperator p : plan.getOperators(node2Meta)) {
PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
Assert.assertEquals("partition mask " + p, 3, pks.mask);
Assert.assertEquals("inputs " + p, 2, p.getInputs().size());
boolean portConnected = false;
for (PTInput input : p.getInputs()) {
if (GenericTestOperator.IPORT1.equals(input.portName)) {
portConnected = true;
Assert.assertEquals("partition mask " + input, pks, input.partitions);
}
}
Assert.assertTrue("connected " + GenericTestOperator.IPORT1, portConnected);
}
Assert.assertEquals("" + ctx.undeploy, expUndeploy, ctx.undeploy);
o3unifiers = plan.getOperators(dag.getMeta(mergeNode)).get(0).upstreamMerge.values();
Set<PTOperator> expDeploy = Sets.newHashSet(plan.getOperators(dag.getMeta(mergeNode)));
expDeploy.addAll(plan.getOperators(node2Meta));
expDeploy.addAll(plan.getOperators(o3Meta));
expDeploy.addAll(o3unifiers);
Assert.assertEquals("" + ctx.deploy, expDeploy, ctx.deploy);
for (PTOperator oper : ctx.deploy) {
Assert.assertNotNull("container " + oper, oper.getContainer());
}
Assert.assertEquals("Count of storage requests", 8, ctx.backupRequests);
}
use of com.datatorrent.api.Partitioner.PartitionKeys in project apex-core by apache.
the class StreamPersistanceTests method testDynamicPartitioning.
@Test
public void testDynamicPartitioning() throws ClassNotFoundException, IOException {
AscendingNumbersOperator ascend = dag.addOperator("ascend", new AscendingNumbersOperator());
final TestReceiverOperator console = dag.addOperator("console", new TestReceiverOperator());
dag.setOperatorAttribute(console, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<TestReceiverOperator>(2));
dag.setOperatorAttribute(console, Context.OperatorContext.STATS_LISTENERS, Lists.newArrayList((StatsListener) new PartitioningTest.PartitionLoadWatch()));
final PartitionedTestPersistanceOperator console1 = new PartitionedTestPersistanceOperator();
StreamMeta s = dag.addStream("Stream1", ascend.outputPort, console.inport);
dag.setInputPortAttribute(console.inport, PortContext.STREAM_CODEC, new TestPartitionCodec());
s.persistUsing("persister", console1, console1.inport);
dag.setAttribute(LogicalPlan.CONTAINERS_MAX_COUNT, Integer.MAX_VALUE);
StramTestSupport.MemoryStorageAgent msa = new StramTestSupport.MemoryStorageAgent();
dag.setAttribute(Context.OperatorContext.STORAGE_AGENT, msa);
StreamingContainerManager dnm = new StreamingContainerManager(dag);
PhysicalPlan plan = dnm.getPhysicalPlan();
List<PTContainer> containers = plan.getContainers();
Assert.assertEquals("number containers", 4, containers.size());
for (int i = 0; i < containers.size(); ++i) {
StreamingContainerManagerTest.assignContainer(dnm, "container" + (i + 1));
}
LogicalPlan.OperatorMeta passThruMeta = dag.getMeta(console);
List<PTOperator> ptos = plan.getOperators(passThruMeta);
PTOperator persistOperatorContainer = null;
for (PTContainer container : plan.getContainers()) {
for (PTOperator operator : container.getOperators()) {
operator.setState(PTOperator.State.ACTIVE);
if (operator.getName().equals("persister")) {
persistOperatorContainer = operator;
}
}
}
// Check that persist operator is part of dependents redeployed
Set<PTOperator> operators = plan.getDependents(ptos);
logger.debug("Operators to be re-deployed = {}", operators);
// Validate that persist operator is part of dependents
assertTrue("persist operator should be part of the operators to be redeployed", operators.contains(persistOperatorContainer));
LogicalPlan.StreamMeta s1 = (LogicalPlan.StreamMeta) s;
StreamCodec codec = s1.getPersistOperatorInputPort().getStreamCodec();
assertEquals("Codec should be instance of StreamCodecWrapper", codec instanceof StreamCodecWrapperForPersistance, true);
StreamCodecWrapperForPersistance wrapperCodec = (StreamCodecWrapperForPersistance) codec;
Entry<InputPortMeta, Collection<PartitionKeys>> keys = (Entry<InputPortMeta, Collection<PartitionKeys>>) wrapperCodec.inputPortToPartitionMap.entrySet().iterator().next();
logger.debug(keys.toString());
assertEquals("Size of partitions should be 2", 2, keys.getValue().size());
for (PTOperator ptOperator : ptos) {
PartitioningTest.PartitionLoadWatch.put(ptOperator, -1);
plan.onStatusUpdate(ptOperator);
}
dnm.processEvents();
assertEquals("Input port map", wrapperCodec.inputPortToPartitionMap.size(), 1);
keys = (Entry<InputPortMeta, Collection<PartitionKeys>>) wrapperCodec.inputPortToPartitionMap.entrySet().iterator().next();
assertEquals("Size of partitions should be 1 after repartition", 1, keys.getValue().size());
logger.debug(keys.toString());
}
use of com.datatorrent.api.Partitioner.PartitionKeys in project apex-core by apache.
the class PhysicalPlan method updatePersistOperatorWithSinkPartitions.
private void updatePersistOperatorWithSinkPartitions(InputPortMeta persistInputPort, OperatorMeta persistOperatorMeta, StreamCodecWrapperForPersistance<?> persistCodec, InputPortMeta sinkPortMeta) {
Collection<PTOperator> ptOperators = getOperators(sinkPortMeta.getOperatorMeta());
Collection<PartitionKeys> partitionKeysList = new ArrayList<>();
for (PTOperator p : ptOperators) {
PartitionKeys keys = p.partitionKeys.get(sinkPortMeta);
partitionKeysList.add(keys);
}
persistCodec.inputPortToPartitionMap.put(sinkPortMeta, partitionKeysList);
}
use of com.datatorrent.api.Partitioner.PartitionKeys in project apex-core by apache.
the class StreamMapping method redoMapping.
/**
* rebuild the tree, which may cause more changes to execution layer than need be
* TODO: investigate incremental logic
*/
private void redoMapping() {
Set<Pair<PTOperator, InputPortMeta>> downstreamOpers = Sets.newHashSet();
// figure out the downstream consumers
for (InputPortMeta ipm : streamMeta.getSinks()) {
// skipped for parallel partitions - those are handled elsewhere
if (!ipm.getValue(PortContext.PARTITION_PARALLEL) && plan.hasMapping(ipm.getOperatorMeta())) {
List<PTOperator> partitions = plan.getOperators(ipm.getOperatorMeta());
for (PTOperator doper : partitions) {
downstreamOpers.add(new Pair<>(doper, ipm));
}
}
}
if (!downstreamOpers.isEmpty()) {
// unifiers are required
for (PTOperator unifier : this.cascadingUnifiers) {
detachUnifier(unifier);
}
if (this.finalUnifier != null) {
detachUnifier(finalUnifier);
}
List<PTOperator> currentUnifiers = Lists.newArrayList(this.cascadingUnifiers);
this.cascadingUnifiers.clear();
plan.undeployOpers.addAll(currentUnifiers);
addSlidingUnifiers();
int limit = streamMeta.getSource().getValue(PortContext.UNIFIER_LIMIT);
boolean separateUnifiers = false;
Integer lastId = null;
for (InputPortMeta ipm : streamMeta.getSinks()) {
Integer id = plan.getStreamCodecIdentifier(ipm.getStreamCodec());
if (lastId == null) {
lastId = id;
} else if (!id.equals(lastId)) {
separateUnifiers = true;
break;
}
}
List<PTOutput> unifierSources = this.upstream;
Map<StreamCodec<?>, List<PTOutput>> cascadeUnifierSourcesMap = Maps.newHashMap();
if (limit > 1 && this.upstream.size() > limit) {
// cascading unifier
if (!separateUnifiers) {
unifierSources = setupCascadingUnifiers(this.upstream, currentUnifiers, limit, 0);
} else {
for (InputPortMeta ipm : streamMeta.getSinks()) {
StreamCodec<?> streamCodec = ipm.getStreamCodec();
if (!cascadeUnifierSourcesMap.containsKey(streamCodec)) {
unifierSources = setupCascadingUnifiers(this.upstream, currentUnifiers, limit, 0);
cascadeUnifierSourcesMap.put(streamCodec, unifierSources);
}
}
}
}
// remove remaining unifiers
for (PTOperator oper : currentUnifiers) {
plan.removePTOperator(oper);
}
// Directly getting attribute from map to know if it is set or not as it can be overriden by the input
Boolean sourceSingleFinal = streamMeta.getSource().getAttributes().get(PortContext.UNIFIER_SINGLE_FINAL);
// link the downstream operators with the unifiers
for (Pair<PTOperator, InputPortMeta> doperEntry : downstreamOpers) {
Map<LogicalPlan.InputPortMeta, PartitionKeys> partKeys = doperEntry.first.partitionKeys;
PartitionKeys pks = partKeys != null ? partKeys.get(doperEntry.second) : null;
Boolean sinkSingleFinal = doperEntry.second.getAttributes().get(PortContext.UNIFIER_SINGLE_FINAL);
boolean lastSingle = (sinkSingleFinal != null) ? sinkSingleFinal : (sourceSingleFinal != null ? sourceSingleFinal.booleanValue() : PortContext.UNIFIER_SINGLE_FINAL.defaultValue);
if (upstream.size() > 1) {
// detach downstream from upstream operator for the case where no unifier existed previously
for (PTOutput source : upstream) {
Iterator<PTInput> sinks = source.sinks.iterator();
while (sinks.hasNext()) {
PTInput sink = sinks.next();
if (sink.target == doperEntry.first) {
doperEntry.first.inputs.remove(sink);
sinks.remove();
}
}
}
if (!separateUnifiers && lastSingle) {
if (finalUnifier == null) {
finalUnifier = createUnifier(streamMeta, plan);
}
setInput(doperEntry.first, doperEntry.second, finalUnifier, (pks == null) || (pks.mask == 0) ? null : pks);
if (finalUnifier.inputs.isEmpty()) {
// set unifier inputs once, regardless how many downstream operators there are
for (PTOutput out : unifierSources) {
addInput(this.finalUnifier, out, null);
}
}
} else {
// MxN partitioning: unifier per downstream partition
LOG.debug("MxN unifier for {} {} {}", new Object[] { doperEntry.first, doperEntry.second.getPortName(), pks });
PTOperator unifier = doperEntry.first.upstreamMerge.get(doperEntry.second);
if (unifier == null) {
unifier = createUnifier(streamMeta, plan);
doperEntry.first.upstreamMerge.put(doperEntry.second, unifier);
setInput(doperEntry.first, doperEntry.second, unifier, null);
}
// sources may change dynamically, rebuild inputs (as for cascading unifiers)
for (PTInput in : unifier.inputs) {
in.source.sinks.remove(in);
}
unifier.inputs.clear();
List<PTOutput> doperUnifierSources = unifierSources;
if (separateUnifiers) {
List<PTOutput> cascadeSources = cascadeUnifierSourcesMap.get(doperEntry.second.getStreamCodec());
if (cascadeSources != null) {
doperUnifierSources = cascadeSources;
}
}
// add new inputs
for (PTOutput out : doperUnifierSources) {
addInput(unifier, out, (pks == null) || (pks.mask == 0) ? null : pks);
}
}
} else {
// no partitioning
PTOperator unifier = doperEntry.first.upstreamMerge.remove(doperEntry.second);
if (unifier != null) {
plan.removePTOperator(unifier);
}
setInput(doperEntry.first, doperEntry.second, upstream.get(0).source, pks);
}
}
// 2) Downstream operators partitions are scaled up from one to multiple. (replaced by merged unifier)
if (finalUnifier != null && finalUnifier.inputs.isEmpty()) {
plan.removePTOperator(finalUnifier);
finalUnifier = null;
}
}
}
Aggregations