use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class StreamingContainerManager method aggregateMetrics.
private void aggregateMetrics(long windowId, Map<Integer, EndWindowStats> endWindowStatsMap) {
Collection<OperatorMeta> logicalOperators = getLogicalPlan().getAllOperators();
//for backward compatibility
for (OperatorMeta operatorMeta : logicalOperators) {
@SuppressWarnings("deprecation") Context.CountersAggregator aggregator = operatorMeta.getValue(OperatorContext.COUNTERS_AGGREGATOR);
if (aggregator == null) {
continue;
}
Collection<PTOperator> physicalOperators = plan.getAllOperators(operatorMeta);
List<Object> counters = Lists.newArrayList();
for (PTOperator operator : physicalOperators) {
EndWindowStats stats = endWindowStatsMap.get(operator.getId());
if (stats != null && stats.counters != null) {
counters.add(stats.counters);
}
}
if (counters.size() > 0) {
@SuppressWarnings("deprecation") Object aggregate = aggregator.aggregate(counters);
latestLogicalCounters.put(operatorMeta.getName(), aggregate);
}
}
for (OperatorMeta operatorMeta : logicalOperators) {
AutoMetric.Aggregator aggregator = operatorMeta.getMetricAggregatorMeta() != null ? operatorMeta.getMetricAggregatorMeta().getAggregator() : null;
if (aggregator == null) {
continue;
}
Collection<PTOperator> physicalOperators = plan.getAllOperators(operatorMeta);
List<AutoMetric.PhysicalMetricsContext> metricPool = Lists.newArrayList();
for (PTOperator operator : physicalOperators) {
EndWindowStats stats = endWindowStatsMap.get(operator.getId());
if (stats != null && stats.metrics != null) {
PhysicalMetricsContextImpl physicalMetrics = new PhysicalMetricsContextImpl(operator.getId(), stats.metrics);
metricPool.add(physicalMetrics);
}
}
if (metricPool.isEmpty()) {
//nothing to aggregate
continue;
}
Map<String, Object> lm = aggregator.aggregate(windowId, metricPool);
if (lm != null && lm.size() > 0) {
Queue<Pair<Long, Map<String, Object>>> windowMetrics = logicalMetrics.get(operatorMeta.getName());
if (windowMetrics == null) {
windowMetrics = new LinkedBlockingQueue<Pair<Long, Map<String, Object>>>(METRIC_QUEUE_SIZE) {
private static final long serialVersionUID = 1L;
@Override
public boolean add(Pair<Long, Map<String, Object>> longMapPair) {
if (remainingCapacity() <= 1) {
remove();
}
return super.add(longMapPair);
}
};
logicalMetrics.put(operatorMeta.getName(), windowMetrics);
}
LOG.debug("Adding to logical metrics for {}", operatorMeta.getName());
windowMetrics.add(new Pair<>(windowId, lm));
Map<String, Object> oldValue = latestLogicalMetrics.put(operatorMeta.getName(), lm);
if (oldValue == null) {
try {
saveMetaInfo();
} catch (IOException ex) {
LOG.error("Cannot save application meta info to DFS. App data sources will not be available.", ex);
}
}
}
}
}
use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class StreamingContainerManager method deploy.
@Override
public void deploy(Set<PTContainer> releaseContainers, Collection<PTOperator> undeploy, Set<PTContainer> startContainers, Collection<PTOperator> deploy) {
try {
this.deployChangeInProgress.set(true);
Map<PTContainer, List<PTOperator>> undeployGroups = groupByContainer(undeploy);
// order does not matter, remove all operators in each container in one sweep
for (Map.Entry<PTContainer, List<PTOperator>> e : undeployGroups.entrySet()) {
// container may already be in failed or pending deploy state, notified by RM or timed out
PTContainer c = e.getKey();
if (!startContainers.contains(c) && !releaseContainers.contains(c) && c.getState() != PTContainer.State.KILLED) {
LOG.debug("scheduling undeploy {} {}", e.getKey().getExternalId(), e.getValue());
for (PTOperator oper : e.getValue()) {
oper.setState(PTOperator.State.PENDING_UNDEPLOY);
}
}
}
// start new containers
for (PTContainer c : startContainers) {
requestContainer(c);
}
// (re)deploy affected operators
// can happen in parallel after buffer server for recovered publishers is reset
Map<PTContainer, List<PTOperator>> deployGroups = groupByContainer(deploy);
for (Map.Entry<PTContainer, List<PTOperator>> e : deployGroups.entrySet()) {
if (!startContainers.contains(e.getKey())) {
// to reset publishers, clean buffer server past checkpoint so subscribers don't read stale data (including end of stream)
for (PTOperator operator : e.getValue()) {
for (PTOperator.PTOutput out : operator.getOutputs()) {
if (!out.isDownStreamInline()) {
for (InputPortMeta ipm : out.logicalStream.getSinks()) {
StreamCodec<?> streamCodec = ipm.getStreamCodec();
Integer codecId = plan.getStreamCodecIdentifier(streamCodec);
// following needs to match the concat logic in StreamingContainer
String sourceIdentifier = Integer.toString(operator.getId()).concat(Component.CONCAT_SEPARATOR).concat(out.portName).concat(Component.CONCAT_SEPARATOR).concat(codecId.toString());
if (operator.getContainer().getState() == PTContainer.State.ACTIVE) {
// TODO: unit test - find way to mock this when testing rest of logic
if (operator.getContainer().bufferServerAddress.getPort() != 0) {
BufferServerController bsc = getBufferServerClient(operator);
// ensures new subscriber starting to read from checkpoint will wait until publisher redeploy cycle is complete
try {
bsc.reset(null, sourceIdentifier, 0);
} catch (Exception ex) {
LOG.error("Failed to reset buffer server {} {}", sourceIdentifier, ex);
}
}
}
}
}
}
}
}
// add to operators that we expect to deploy
LOG.debug("scheduling deploy {} {}", e.getKey().getExternalId(), e.getValue());
for (PTOperator oper : e.getValue()) {
// operator will be deployed after it has been undeployed, if still referenced by the container
if (oper.getState() != PTOperator.State.PENDING_UNDEPLOY) {
oper.setState(PTOperator.State.PENDING_DEPLOY);
}
}
}
// stop containers that are no longer used
for (PTContainer c : releaseContainers) {
if (c.getExternalId() == null) {
continue;
}
StreamingContainerAgent sca = containers.get(c.getExternalId());
if (sca != null) {
LOG.debug("Container marked for shutdown: {}", c);
// container already removed from plan
// TODO: monitor soft shutdown
sca.requestShutDown(ShutdownType.ABORT);
}
}
} finally {
this.deployChangeCnt++;
this.deployChangeInProgress.set(false);
}
}
use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class StreamingContainerManager method updateOperatorLatency.
public long updateOperatorLatency(PTOperator oper, UpdateOperatorLatencyContext ctx) {
if (!oper.getInputs().isEmpty() && oper.stats.currentWindowId.get() > 0) {
OperatorStatus status = oper.stats;
long latency = Long.MAX_VALUE;
PTOperator slowestUpstream = null;
int windowWidthMillis = plan.getLogicalPlan().getValue(LogicalPlan.STREAMING_WINDOW_SIZE_MILLIS);
int heartbeatTimeoutMillis = plan.getLogicalPlan().getValue(LogicalPlan.HEARTBEAT_TIMEOUT_MILLIS);
long currentWindowId = status.currentWindowId.get();
if (!ctx.endWindowStatsExists(currentWindowId)) {
// the end window stats for the current window id is not available, estimate latency by looking at upstream window id
for (PTInput input : oper.getInputs()) {
PTOperator upstreamOp = input.source.source;
if (upstreamOp.getOperatorMeta().getOperator() instanceof Operator.DelayOperator) {
continue;
}
if (upstreamOp.stats.currentWindowId.get() >= oper.stats.currentWindowId.get()) {
long portLatency = WindowGenerator.compareWindowId(upstreamOp.stats.currentWindowId.get(), oper.stats.currentWindowId.get(), windowWidthMillis) * windowWidthMillis;
if (latency > portLatency) {
latency = portLatency;
slowestUpstream = upstreamOp;
}
}
}
} else {
long endWindowEmitTime = ctx.getEndWindowEmitTimestamp(currentWindowId, oper);
long adjustedEndWindowEmitTimestamp = endWindowEmitTime + ctx.getRPCLatency(oper);
for (PTInput input : oper.getInputs()) {
PTOperator upstreamOp = input.source.source;
if (upstreamOp.getOperatorMeta().getOperator() instanceof Operator.DelayOperator) {
continue;
}
long upstreamEndWindowEmitTime = ctx.getEndWindowEmitTimestamp(currentWindowId, upstreamOp);
if (upstreamEndWindowEmitTime < 0) {
continue;
}
long portLatency = adjustedEndWindowEmitTimestamp - (upstreamEndWindowEmitTime + ctx.getRPCLatency(upstreamOp));
if (portLatency < 0) {
portLatency = 0;
}
long latencyFromWindowsBehind = WindowGenerator.compareWindowId(upstreamOp.stats.currentWindowId.get(), oper.stats.currentWindowId.get(), windowWidthMillis) * windowWidthMillis;
if (latencyFromWindowsBehind > portLatency && latencyFromWindowsBehind > heartbeatTimeoutMillis) {
portLatency = latencyFromWindowsBehind;
}
if (latency > portLatency) {
latency = portLatency;
slowestUpstream = upstreamOp;
}
}
}
if (slowestUpstream != null) {
status.latencyMA.add(latency);
slowestUpstreamOp.put(oper, slowestUpstream);
return latency;
}
}
return -1;
}
use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class StreamingContainerManager method purgeCheckpoints.
private void purgeCheckpoints() {
for (Pair<PTOperator, Long> p : purgeCheckpoints) {
final PTOperator operator = p.getFirst();
if (!operator.isOperatorStateLess()) {
final long windowId = p.getSecond();
Runnable r = new Runnable() {
@Override
public void run() {
try {
operator.getOperatorMeta().getValue(OperatorContext.STORAGE_AGENT).delete(operator.getId(), windowId);
} catch (IOException ex) {
LOG.error("Failed to purge checkpoint for operator {} for windowId {}", operator, windowId, ex);
}
}
};
poolExecutor.submit(r);
}
}
purgeCheckpoints.clear();
}
use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.
the class StreamingContainerManager method fillLogicalOperatorInfo.
private LogicalOperatorInfo fillLogicalOperatorInfo(OperatorMeta operator) {
LogicalOperatorInfo loi = new LogicalOperatorInfo();
loi.name = operator.getName();
loi.className = operator.getOperator().getClass().getName();
loi.totalTuplesEmitted = operator.getStatus().totalTuplesEmitted;
loi.totalTuplesProcessed = operator.getStatus().totalTuplesProcessed;
loi.failureCount = operator.getStatus().failureCount;
loi.status = new HashMap<>();
loi.partitions = new TreeSet<>();
loi.unifiers = new TreeSet<>();
loi.containerIds = new TreeSet<>();
loi.hosts = new TreeSet<>();
Collection<PTOperator> physicalOperators = getPhysicalPlan().getAllOperators(operator);
NumberAggregate.LongAggregate checkpointTimeAggregate = new NumberAggregate.LongAggregate();
for (PTOperator physicalOperator : physicalOperators) {
OperatorStatus os = physicalOperator.stats;
if (physicalOperator.isUnifier()) {
loi.unifiers.add(physicalOperator.getId());
} else {
loi.partitions.add(physicalOperator.getId());
// exclude unifier, not sure if we should include it in the future
loi.tuplesEmittedPSMA += os.tuplesEmittedPSMA.get();
loi.tuplesProcessedPSMA += os.tuplesProcessedPSMA.get();
// calculate maximum latency for all partitions
long latency = calculateLatency(physicalOperator);
if (latency > loi.latencyMA) {
loi.latencyMA = latency;
}
checkpointTimeAggregate.addNumber(os.checkpointTimeMA.getAvg());
}
loi.cpuPercentageMA += os.cpuNanosPMSMA.getAvg() / 10000;
if (os.lastHeartbeat != null && (loi.lastHeartbeat == 0 || loi.lastHeartbeat > os.lastHeartbeat.getGeneratedTms())) {
loi.lastHeartbeat = os.lastHeartbeat.getGeneratedTms();
}
long currentWindowId = toWsWindowId(os.currentWindowId.get());
if (loi.currentWindowId == 0 || loi.currentWindowId > currentWindowId) {
loi.currentWindowId = currentWindowId;
}
MutableInt count = loi.status.get(physicalOperator.getState().toString());
if (count == null) {
count = new MutableInt();
loi.status.put(physicalOperator.getState().toString(), count);
}
count.increment();
if (physicalOperator.getRecoveryCheckpoint() != null) {
long recoveryWindowId = toWsWindowId(physicalOperator.getRecoveryCheckpoint().windowId);
if (loi.recoveryWindowId == 0 || loi.recoveryWindowId > recoveryWindowId) {
loi.recoveryWindowId = recoveryWindowId;
}
}
PTContainer container = physicalOperator.getContainer();
if (container != null) {
String externalId = container.getExternalId();
if (externalId != null) {
loi.containerIds.add(externalId);
loi.hosts.add(container.host);
}
}
}
if (physicalOperators.size() > 0 && checkpointTimeAggregate.getAvg() != null) {
loi.checkpointTimeMA = checkpointTimeAggregate.getAvg().longValue();
loi.counters = latestLogicalCounters.get(operator.getName());
loi.autoMetrics = latestLogicalMetrics.get(operator.getName());
}
return loi;
}
Aggregations