use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class StreamingContainerManager method updateRecoveryCheckpoints.
/**
* Compute checkpoints required for a given operator instance to be recovered.
* This is done by looking at checkpoints available for downstream dependencies first,
* and then selecting the most recent available checkpoint that is smaller than downstream.
*
* @param operator Operator instance for which to find recovery checkpoint
* @param ctx Context into which to collect traversal info
*/
public void updateRecoveryCheckpoints(PTOperator operator, UpdateCheckpointsContext ctx, boolean recovery) {
if (operator.getRecoveryCheckpoint().windowId < ctx.committedWindowId.longValue()) {
ctx.committedWindowId.setValue(operator.getRecoveryCheckpoint().windowId);
}
if (operator.getState() == PTOperator.State.ACTIVE && (ctx.currentTms - operator.stats.lastWindowIdChangeTms) > operator.stats.windowProcessingTimeoutMillis) {
// if the checkpoint is ahead, then it is not blocked but waiting for activation (state-less recovery, at-most-once)
if (ctx.committedWindowId.longValue() >= operator.getRecoveryCheckpoint().windowId && !recovery) {
LOG.warn("Marking operator {} blocked committed window {}, recovery window {}, current time {}, last window id change time {}, window processing timeout millis {}", operator, Codec.getStringWindowId(ctx.committedWindowId.longValue()), Codec.getStringWindowId(operator.getRecoveryCheckpoint().windowId), ctx.currentTms, operator.stats.lastWindowIdChangeTms, operator.stats.windowProcessingTimeoutMillis);
ctx.blocked.add(operator);
}
}
// the most recent checkpoint eligible for recovery based on downstream state
Checkpoint maxCheckpoint = Checkpoint.INITIAL_CHECKPOINT;
Set<OperatorMeta> checkpointGroup = ctx.checkpointGroups.get(operator.getOperatorMeta());
if (checkpointGroup == null) {
checkpointGroup = Collections.singleton(operator.getOperatorMeta());
}
// find intersection of checkpoints that group can collectively move to
TreeSet<Checkpoint> commonCheckpoints = new TreeSet<>(new Checkpoint.CheckpointComparator());
synchronized (operator.checkpoints) {
commonCheckpoints.addAll(operator.checkpoints);
}
Set<PTOperator> groupOpers = new HashSet<>(checkpointGroup.size());
boolean pendingDeploy = operator.getState() == PTOperator.State.PENDING_DEPLOY;
if (checkpointGroup.size() > 1) {
for (OperatorMeta om : checkpointGroup) {
Collection<PTOperator> operators = plan.getAllOperators(om);
Collection<PTOperator> unifiers = getUnifiersInCheckpointGroup(operators);
operators.addAll(unifiers);
for (PTOperator groupOper : operators) {
synchronized (groupOper.checkpoints) {
commonCheckpoints.retainAll(groupOper.checkpoints);
}
// visit all downstream operators of the group
ctx.visited.add(groupOper);
groupOpers.add(groupOper);
pendingDeploy |= groupOper.getState() == PTOperator.State.PENDING_DEPLOY;
}
}
// highest common checkpoint
if (!commonCheckpoints.isEmpty()) {
maxCheckpoint = commonCheckpoints.last();
}
} else {
// without logical grouping, treat partitions as independent
// this is especially important for parallel partitioning
ctx.visited.add(operator);
groupOpers.add(operator);
maxCheckpoint = operator.getRecentCheckpoint();
if (ctx.recovery && maxCheckpoint.windowId == Stateless.WINDOW_ID && operator.isOperatorStateLess()) {
long currentWindowId = WindowGenerator.getWindowId(ctx.currentTms, this.vars.windowStartMillis, this.getLogicalPlan().getValue(LogicalPlan.STREAMING_WINDOW_SIZE_MILLIS));
maxCheckpoint = new Checkpoint(currentWindowId, 0, 0);
}
}
// DFS downstream operators
for (PTOperator groupOper : groupOpers) {
for (PTOperator.PTOutput out : groupOper.getOutputs()) {
for (PTOperator.PTInput sink : out.sinks) {
PTOperator sinkOperator = sink.target;
if (groupOpers.contains(sinkOperator)) {
// downstream operator within group
continue;
}
if (!ctx.visited.contains(sinkOperator)) {
// downstream traversal
updateRecoveryCheckpoints(sinkOperator, ctx, recovery);
}
// when dynamically adding new operators
if (sinkOperator.getRecoveryCheckpoint().windowId >= operator.getRecoveryCheckpoint().windowId) {
maxCheckpoint = Checkpoint.min(maxCheckpoint, sinkOperator.getRecoveryCheckpoint());
}
if (ctx.blocked.contains(sinkOperator)) {
if (sinkOperator.stats.getCurrentWindowId() == operator.stats.getCurrentWindowId()) {
// downstream operator is blocked by this operator
ctx.blocked.remove(sinkOperator);
}
}
}
}
}
// find the common checkpoint that is <= downstream recovery checkpoint
if (!commonCheckpoints.contains(maxCheckpoint)) {
if (!commonCheckpoints.isEmpty()) {
maxCheckpoint = Objects.firstNonNull(commonCheckpoints.floor(maxCheckpoint), maxCheckpoint);
}
}
for (PTOperator groupOper : groupOpers) {
// checkpoint frozen during deployment
if (!pendingDeploy || ctx.recovery) {
// remove previous checkpoints
Checkpoint c1 = Checkpoint.INITIAL_CHECKPOINT;
LinkedList<Checkpoint> checkpoints = groupOper.checkpoints;
synchronized (checkpoints) {
if (!checkpoints.isEmpty() && (checkpoints.getFirst()).windowId <= maxCheckpoint.windowId) {
c1 = checkpoints.getFirst();
Checkpoint c2;
while (checkpoints.size() > 1 && ((c2 = checkpoints.get(1)).windowId) <= maxCheckpoint.windowId) {
checkpoints.removeFirst();
//LOG.debug("Checkpoint to delete: operator={} windowId={}", operator.getName(), c1);
this.purgeCheckpoints.add(new Pair<>(groupOper, c1.windowId));
c1 = c2;
}
} else {
if (ctx.recovery && checkpoints.isEmpty() && groupOper.isOperatorStateLess()) {
LOG.debug("Adding checkpoint for stateless operator {} {}", groupOper, Codec.getStringWindowId(maxCheckpoint.windowId));
c1 = groupOper.addCheckpoint(maxCheckpoint.windowId, this.vars.windowStartMillis);
}
}
}
//LOG.debug("Operator {} checkpoints: commit {} recent {}", new Object[] {operator.getName(), c1, operator.checkpoints});
groupOper.setRecoveryCheckpoint(c1);
} else {
LOG.debug("Skipping checkpoint update {} during {}", groupOper, groupOper.getState());
}
}
}
use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class StreamingContainerManager method removeContainerAgent.
public void removeContainerAgent(String containerId) {
LOG.debug("Removing container agent {}", containerId);
StreamingContainerAgent containerAgent = containers.remove(containerId);
if (containerAgent != null) {
// record operator stop for this container
for (PTOperator oper : containerAgent.container.getOperators()) {
StramEvent ev = new StramEvent.StopOperatorEvent(oper.getName(), oper.getId(), containerId);
recordEventAsync(ev);
}
containerAgent.container.setFinishedTime(System.currentTimeMillis());
containerAgent.container.setState(PTContainer.State.KILLED);
completedContainers.put(containerId, containerAgent.getContainerInfo());
}
}
use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class LogicalPlanConfigurationTest method testModuleUnifierLevelAttributes.
@Test
@SuppressWarnings({ "UnnecessaryBoxing", "AssertEqualsBetweenInconvertibleTypes" })
public void testModuleUnifierLevelAttributes() {
class DummyOperator extends BaseOperator {
int prop;
public transient DefaultInputPort<Integer> input = new DefaultInputPort<Integer>() {
@Override
public void process(Integer tuple) {
LOG.debug(tuple.intValue() + " processed");
output.emit(tuple);
}
};
public transient DefaultOutputPort<Integer> output = new DefaultOutputPort<>();
}
class DummyOutputOperator extends BaseOperator {
int prop;
public transient DefaultInputPort<Integer> input = new DefaultInputPort<Integer>() {
@Override
public void process(Integer tuple) {
LOG.debug(tuple.intValue() + " processed");
}
};
}
class TestUnifierAttributeModule implements Module {
public transient ProxyInputPort<Integer> moduleInput = new ProxyInputPort<>();
public transient ProxyOutputPort<Integer> moduleOutput = new Module.ProxyOutputPort<>();
@Override
public void populateDAG(DAG dag, Configuration conf) {
DummyOperator dummyOperator = dag.addOperator("DummyOperator", new DummyOperator());
dag.setOperatorAttribute(dummyOperator, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<DummyOperator>(3));
dag.setUnifierAttribute(dummyOperator.output, OperatorContext.TIMEOUT_WINDOW_COUNT, 2);
moduleInput.set(dummyOperator.input);
moduleOutput.set(dummyOperator.output);
}
}
StreamingApplication app = new StreamingApplication() {
@Override
public void populateDAG(DAG dag, Configuration conf) {
Module m1 = dag.addModule("TestModule", new TestUnifierAttributeModule());
DummyOutputOperator dummyOutputOperator = dag.addOperator("DummyOutputOperator", new DummyOutputOperator());
dag.addStream("Module To Operator", ((TestUnifierAttributeModule) m1).moduleOutput, dummyOutputOperator.input);
}
};
String appName = "UnifierApp";
LogicalPlanConfiguration dagBuilder = new LogicalPlanConfiguration(new Configuration(false));
LogicalPlan dag = new LogicalPlan();
dag.setAttribute(Context.OperatorContext.STORAGE_AGENT, new MockStorageAgent());
dagBuilder.prepareDAG(dag, app, appName);
LogicalPlan.OperatorMeta ometa = dag.getOperatorMeta("TestModule$DummyOperator");
LogicalPlan.OperatorMeta om = null;
for (Map.Entry<LogicalPlan.OutputPortMeta, LogicalPlan.StreamMeta> entry : ometa.getOutputStreams().entrySet()) {
if (entry.getKey().getPortName().equals("output")) {
om = entry.getKey().getUnifierMeta();
}
}
/*
* Verify the attribute value after preparing DAG.
*/
Assert.assertNotNull(om);
Assert.assertEquals("", Integer.valueOf(2), om.getValue(Context.OperatorContext.TIMEOUT_WINDOW_COUNT));
PhysicalPlan plan = new PhysicalPlan(dag, new TestPlanContext());
List<PTContainer> containers = plan.getContainers();
LogicalPlan.OperatorMeta operatorMeta = null;
for (PTContainer container : containers) {
List<PTOperator> operators = container.getOperators();
for (PTOperator operator : operators) {
if (operator.isUnifier()) {
operatorMeta = operator.getOperatorMeta();
}
}
}
/*
* Verify attribute after physical plan creation with partitioned operators.
*/
Assert.assertEquals("", Integer.valueOf(2), operatorMeta.getValue(OperatorContext.TIMEOUT_WINDOW_COUNT));
}
use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class StreamPersistanceTests method testDynamicPartitioning.
@Test
public void testDynamicPartitioning() throws ClassNotFoundException, IOException {
AscendingNumbersOperator ascend = dag.addOperator("ascend", new AscendingNumbersOperator());
final TestReceiverOperator console = dag.addOperator("console", new TestReceiverOperator());
dag.setOperatorAttribute(console, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<TestReceiverOperator>(2));
dag.setOperatorAttribute(console, Context.OperatorContext.STATS_LISTENERS, Lists.newArrayList((StatsListener) new PartitioningTest.PartitionLoadWatch()));
final PartitionedTestPersistanceOperator console1 = new PartitionedTestPersistanceOperator();
StreamMeta s = dag.addStream("Stream1", ascend.outputPort, console.inport);
dag.setInputPortAttribute(console.inport, PortContext.STREAM_CODEC, new TestPartitionCodec());
s.persistUsing("persister", console1, console1.inport);
dag.setAttribute(LogicalPlan.CONTAINERS_MAX_COUNT, Integer.MAX_VALUE);
StramTestSupport.MemoryStorageAgent msa = new StramTestSupport.MemoryStorageAgent();
dag.setAttribute(Context.OperatorContext.STORAGE_AGENT, msa);
StreamingContainerManager dnm = new StreamingContainerManager(dag);
PhysicalPlan plan = dnm.getPhysicalPlan();
List<PTContainer> containers = plan.getContainers();
Assert.assertEquals("number containers", 4, containers.size());
for (int i = 0; i < containers.size(); ++i) {
StreamingContainerManagerTest.assignContainer(dnm, "container" + (i + 1));
}
LogicalPlan.OperatorMeta passThruMeta = dag.getMeta(console);
List<PTOperator> ptos = plan.getOperators(passThruMeta);
PTOperator persistOperatorContainer = null;
for (PTContainer container : plan.getContainers()) {
for (PTOperator operator : container.getOperators()) {
operator.setState(PTOperator.State.ACTIVE);
if (operator.getName().equals("persister")) {
persistOperatorContainer = operator;
}
}
}
// Check that persist operator is part of dependents redeployed
Set<PTOperator> operators = plan.getDependents(ptos);
logger.debug("Operators to be re-deployed = {}", operators);
// Validate that persist operator is part of dependents
assertTrue("persist operator should be part of the operators to be redeployed", operators.contains(persistOperatorContainer));
LogicalPlan.StreamMeta s1 = (LogicalPlan.StreamMeta) s;
StreamCodec codec = s1.getPersistOperatorInputPort().getStreamCodec();
assertEquals("Codec should be instance of StreamCodecWrapper", codec instanceof StreamCodecWrapperForPersistance, true);
StreamCodecWrapperForPersistance wrapperCodec = (StreamCodecWrapperForPersistance) codec;
Entry<InputPortMeta, Collection<PartitionKeys>> keys = (Entry<InputPortMeta, Collection<PartitionKeys>>) wrapperCodec.inputPortToPartitionMap.entrySet().iterator().next();
logger.debug(keys.toString());
assertEquals("Size of partitions should be 2", 2, keys.getValue().size());
for (PTOperator ptOperator : ptos) {
PartitioningTest.PartitionLoadWatch.put(ptOperator, -1);
plan.onStatusUpdate(ptOperator);
}
dnm.processEvents();
assertEquals("Input port map", wrapperCodec.inputPortToPartitionMap.size(), 1);
keys = (Entry<InputPortMeta, Collection<PartitionKeys>>) wrapperCodec.inputPortToPartitionMap.entrySet().iterator().next();
assertEquals("Size of partitions should be 1 after repartition", 1, keys.getValue().size());
logger.debug(keys.toString());
}
use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class LocalityTest method testNodeLocal.
@Test
public void testNodeLocal() {
LogicalPlan dag = new LogicalPlan();
dag.getAttributes().put(com.datatorrent.api.Context.DAGContext.APPLICATION_PATH, new File("target", LocalityTest.class.getName()).getAbsolutePath());
dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
GenericTestOperator partitioned = dag.addOperator("partitioned", GenericTestOperator.class);
dag.getMeta(partitioned).getAttributes().put(OperatorContext.PARTITIONER, new StatelessPartitioner<GenericTestOperator>(2));
GenericTestOperator partitionedParallel = dag.addOperator("partitionedParallel", GenericTestOperator.class);
dag.addStream("o1_outport1", o1.outport1, partitioned.inport1).setLocality(null);
dag.addStream("partitioned_outport1", partitioned.outport1, partitionedParallel.inport2).setLocality(Locality.NODE_LOCAL);
dag.setInputPortAttribute(partitionedParallel.inport2, PortContext.PARTITION_PARALLEL, true);
GenericTestOperator single = dag.addOperator("single", GenericTestOperator.class);
dag.addStream("partitionedParallel_outport1", partitionedParallel.outport1, single.inport1);
int maxContainers = 7;
dag.setAttribute(LogicalPlan.CONTAINERS_MAX_COUNT, maxContainers);
StreamingContainerManager scm = new StreamingContainerManager(dag);
Assert.assertEquals("number required containers", 6, scm.containerStartRequests.size());
ResourceRequestHandler rr = new ResourceRequestHandler();
int containerMem = 2000;
Map<String, NodeReport> nodeReports = Maps.newHashMap();
NodeReport nr = BuilderUtils.newNodeReport(BuilderUtils.newNodeId("host1", 0), NodeState.RUNNING, "httpAddress", "rackName", BuilderUtils.newResource(0, 0), BuilderUtils.newResource(containerMem * 2, 2), 0, null, 0);
nodeReports.put(nr.getNodeId().getHost(), nr);
nr = BuilderUtils.newNodeReport(BuilderUtils.newNodeId("host2", 0), NodeState.RUNNING, "httpAddress", "rackName", BuilderUtils.newResource(0, 0), BuilderUtils.newResource(containerMem * 2, 2), 0, null, 0);
nodeReports.put(nr.getNodeId().getHost(), nr);
// set resources
rr.updateNodeReports(Lists.newArrayList(nodeReports.values()));
Map<PTContainer, String> requestedHosts = Maps.newHashMap();
for (ContainerStartRequest csr : scm.containerStartRequests) {
String host = rr.getHost(csr, true);
csr.container.host = host;
// update the node report
if (host != null) {
requestedHosts.put(csr.container, host);
nr = nodeReports.get(host);
nr.getUsed().setMemory(nr.getUsed().getMemory() + containerMem);
}
}
Assert.assertEquals("" + requestedHosts, nodeReports.keySet(), Sets.newHashSet(requestedHosts.values()));
for (Map.Entry<PTContainer, String> e : requestedHosts.entrySet()) {
for (PTOperator oper : e.getKey().getOperators()) {
if (oper.getNodeLocalOperators().getOperatorSet().size() > 1) {
String expHost = null;
for (PTOperator nodeLocalOper : oper.getNodeLocalOperators().getOperatorSet()) {
Assert.assertNotNull("host null " + nodeLocalOper.getContainer(), nodeLocalOper.getContainer().host);
if (expHost == null) {
expHost = nodeLocalOper.getContainer().host;
} else {
Assert.assertEquals("expected same host " + nodeLocalOper, expHost, nodeLocalOper.getContainer().host);
}
}
}
}
}
}
Aggregations