use of org.apache.storm.executor.IRunningExecutor in project storm by apache.
the class Worker method loadWorker.
private Object loadWorker(IStateStorage stateStorage, IStormClusterState stormClusterState, Map<String, String> initCreds, Credentials initialCredentials) throws Exception {
workerState = new WorkerState(conf, context, topologyId, assignmentId, supervisorIfaceSupplier, port, workerId, topologyConf, stateStorage, stormClusterState, autoCreds, metricRegistry, initialCredentials);
this.heatbeatMeter = metricRegistry.meter("doHeartbeat-calls", workerState.getWorkerTopologyContext(), Constants.SYSTEM_COMPONENT_ID, (int) Constants.SYSTEM_TASK_ID);
// Heartbeat here so that worker process dies if this fails
// it's important that worker heartbeat to supervisor ASAP so that supervisor knows
// that worker is running and moves on
doHeartBeat();
executorsAtom = new AtomicReference<>(null);
// launch heartbeat threads immediately so that slow-loading tasks don't cause the worker to timeout
// to the supervisor
workerState.heartbeatTimer.scheduleRecurring(0, (Integer) conf.get(Config.WORKER_HEARTBEAT_FREQUENCY_SECS), () -> {
try {
doHeartBeat();
} catch (IOException e) {
throw new RuntimeException(e);
}
});
Integer execHeartBeatFreqSecs = workerState.stormClusterState.isPacemakerStateStore() ? (Integer) conf.get(Config.TASK_HEARTBEAT_FREQUENCY_SECS) : (Integer) conf.get(Config.EXECUTOR_METRICS_FREQUENCY_SECS);
workerState.executorHeartbeatTimer.scheduleRecurring(0, execHeartBeatFreqSecs, Worker.this::doExecutorHeartbeats);
workerState.refreshConnections();
workerState.activateWorkerWhenAllConnectionsReady();
workerState.refreshStormActive(null);
workerState.runWorkerStartHooks();
List<Executor> execs = new ArrayList<>();
for (List<Long> e : workerState.getLocalExecutors()) {
if (ConfigUtils.isLocalMode(conf)) {
Executor executor = LocalExecutor.mkExecutor(workerState, e, initCreds);
execs.add(executor);
for (int i = 0; i < executor.getTaskIds().size(); ++i) {
workerState.localReceiveQueues.put(executor.getTaskIds().get(i), executor.getReceiveQueue());
}
} else {
Executor executor = Executor.mkExecutor(workerState, e, initCreds);
for (int i = 0; i < executor.getTaskIds().size(); ++i) {
workerState.localReceiveQueues.put(executor.getTaskIds().get(i), executor.getReceiveQueue());
}
execs.add(executor);
}
}
List<IRunningExecutor> newExecutors = new ArrayList<IRunningExecutor>();
for (Executor executor : execs) {
newExecutors.add(executor.execute());
}
executorsAtom.set(newExecutors);
// If there are no remote outbound tasks, don't start the thread.
if (workerState.hasRemoteOutboundTasks()) {
transferThread = workerState.makeTransferThread();
transferThread.setName("Worker-Transfer");
}
establishLogSettingCallback();
final int credCheckMaxAllowed = 10;
// consecutive-error-count
final int[] credCheckErrCnt = new int[1];
workerState.refreshCredentialsTimer.scheduleRecurring(0, (Integer) conf.get(Config.TASK_CREDENTIALS_POLL_SECS), () -> {
try {
checkCredentialsChanged();
credCheckErrCnt[0] = 0;
} catch (Exception ex) {
credCheckErrCnt[0]++;
if (credCheckErrCnt[0] <= credCheckMaxAllowed) {
LOG.warn("Ignoring {} of {} consecutive exceptions when checking for credential change", credCheckErrCnt[0], credCheckMaxAllowed, ex);
} else {
LOG.error("Received {} consecutive exceptions, {} tolerated, when checking for credential change", credCheckErrCnt[0], credCheckMaxAllowed, ex);
throw ex;
}
}
});
workerState.checkForUpdatedBlobsTimer.scheduleRecurring(0, (Integer) conf.getOrDefault(Config.WORKER_BLOB_UPDATE_POLL_INTERVAL_SECS, 10), () -> {
try {
LOG.debug("Checking if blobs have updated");
updateBlobUpdates();
} catch (IOException e) {
// IOException from reading the version files to be ignored
LOG.error(e.getStackTrace().toString());
}
});
// The jitter allows the clients to get the data at different times, and avoids thundering herd
if (!(Boolean) topologyConf.get(Config.TOPOLOGY_DISABLE_LOADAWARE_MESSAGING)) {
workerState.refreshLoadTimer.scheduleRecurringWithJitter(0, 1, 500, Worker.this::doRefreshLoad);
}
workerState.refreshConnectionsTimer.scheduleRecurring(0, (Integer) conf.get(Config.TASK_REFRESH_POLL_SECS), workerState::refreshConnections);
workerState.resetLogLevelsTimer.scheduleRecurring(0, (Integer) conf.get(Config.WORKER_LOG_LEVEL_RESET_POLL_SECS), logConfigManager::resetLogLevels);
workerState.refreshActiveTimer.scheduleRecurring(0, (Integer) conf.get(Config.TASK_REFRESH_POLL_SECS), workerState::refreshStormActive);
setupFlushTupleTimer(topologyConf, newExecutors);
setupBackPressureCheckTimer(topologyConf);
LOG.info("Worker has topology config {}", ConfigUtils.maskPasswords(topologyConf));
LOG.info("Worker {} for storm {} on {}:{} has finished loading", workerId, topologyId, assignmentId, port);
return this;
}
use of org.apache.storm.executor.IRunningExecutor in project storm by apache.
the class Worker method checkCredentialsChanged.
public void checkCredentialsChanged() {
Credentials newCreds = workerState.stormClusterState.credentials(topologyId, null);
if (!ObjectUtils.equals(newCreds, this.workerState.getCredentials())) {
// This does not have to be atomic, worst case we update when one is not needed
ClientAuthUtils.updateSubject(subject, autoCreds, (null == newCreds) ? null : newCreds.get_creds());
this.workerState.setCredentials(newCreds);
for (IRunningExecutor executor : executorsAtom.get()) {
executor.credentialsChanged(newCreds);
}
}
}
use of org.apache.storm.executor.IRunningExecutor in project storm by apache.
the class Worker method mkBackpressureHandler.
/**
* make a handler that checks and updates worker's backpressure flag
*/
private WorkerBackpressureCallback mkBackpressureHandler() {
final List<IRunningExecutor> executors = executorsAtom.get();
return new WorkerBackpressureCallback() {
@Override
public void onEvent(Object obj) {
String topologyId = workerState.topologyId;
String assignmentId = workerState.assignmentId;
int port = workerState.port;
IStormClusterState stormClusterState = workerState.stormClusterState;
boolean prevBackpressureFlag = workerState.backpressure.get();
boolean currBackpressureFlag = prevBackpressureFlag;
if (null != executors) {
currBackpressureFlag = workerState.transferQueue.getThrottleOn() || (executors.stream().map(IRunningExecutor::getBackPressureFlag).reduce((op1, op2) -> (op1 || op2)).get());
}
if (currBackpressureFlag != prevBackpressureFlag) {
try {
LOG.debug("worker backpressure flag changing from {} to {}", prevBackpressureFlag, currBackpressureFlag);
stormClusterState.workerBackpressure(topologyId, assignmentId, (long) port, currBackpressureFlag);
// doing the local reset after the zk update succeeds is very important to avoid a bad state upon zk exception
workerState.backpressure.set(currBackpressureFlag);
} catch (Exception ex) {
LOG.error("workerBackpressure update failed when connecting to ZK ... will retry", ex);
}
}
}
};
}
use of org.apache.storm.executor.IRunningExecutor in project storm by apache.
the class WorkerState method refreshLoad.
public void refreshLoad(List<IRunningExecutor> execs) {
Set<Integer> remoteTasks = Sets.difference(new HashSet<>(outboundTasks), new HashSet<>(localTaskIds));
Map<Integer, Double> localLoad = new HashMap<>();
for (IRunningExecutor exec : execs) {
double receiveLoad = exec.getReceiveQueue().getQueueLoad();
localLoad.put(exec.getExecutorId().get(0).intValue(), receiveLoad);
}
Map<Integer, Load> remoteLoad = new HashMap<>();
cachedNodeToPortSocket.get().values().stream().forEach(conn -> remoteLoad.putAll(conn.getLoad(remoteTasks)));
loadMapping.setLocal(localLoad);
loadMapping.setRemote(remoteLoad);
Long now = System.currentTimeMillis();
if (now > nextLoadUpdate.get()) {
receiver.sendLoadMetrics(localLoad);
nextLoadUpdate.set(now + LOAD_REFRESH_INTERVAL_MS);
}
}
use of org.apache.storm.executor.IRunningExecutor in project storm by apache.
the class Worker method setupFlushTupleTimer.
private void setupFlushTupleTimer(final Map<String, Object> topologyConf, final List<IRunningExecutor> executors) {
final Integer producerBatchSize = ObjectReader.getInt(topologyConf.get(Config.TOPOLOGY_PRODUCER_BATCH_SIZE));
final Integer xferBatchSize = ObjectReader.getInt(topologyConf.get(Config.TOPOLOGY_TRANSFER_BATCH_SIZE));
final Long flushIntervalMillis = ObjectReader.getLong(topologyConf.get(Config.TOPOLOGY_BATCH_FLUSH_INTERVAL_MILLIS));
if ((producerBatchSize == 1 && xferBatchSize == 1) || flushIntervalMillis == 0) {
LOG.info("Flush Tuple generation disabled. producerBatchSize={}, xferBatchSize={}, flushIntervalMillis={}", producerBatchSize, xferBatchSize, flushIntervalMillis);
return;
}
workerState.flushTupleTimer.scheduleRecurringMs(flushIntervalMillis, flushIntervalMillis, () -> {
// send flush tuple to all local executors
for (int i = 0; i < executors.size(); i++) {
IRunningExecutor exec = executors.get(i);
if (exec.getExecutorId().get(0) != Constants.SYSTEM_TASK_ID) {
exec.publishFlushTuple();
}
}
});
LOG.info("Flush tuple will be generated every {} millis", flushIntervalMillis);
}
Aggregations