use of in project storm by apache.
the class LocalizedResource method localVersionOfBlob.
static long localVersionOfBlob(Path versionFile) {
long currentVersion = -1;
if (Files.exists(versionFile) && !(Files.isDirectory(versionFile))) {
try (BufferedReader br = new BufferedReader(new FileReader(versionFile.toFile()))) {
String line = br.readLine();
currentVersion = Long.parseLong(line);
} catch (IOException e) {
throw new RuntimeException(e);
return currentVersion;
use of in project storm by apache.
the class ServerUtils method isAnyPosixProcessPidDirAlive.
* Find if the process is alive using the existence of /proc/<pid> directory
* owned by the supplied expectedUser. This is an alternative to "ps -p pid -u uid" command
* used in {@link #isAnyPosixProcessAlive(Collection, int)}
* <p>
* Processes are tracked using the existence of the directory "/proc/<pid>
* For each of the supplied PIDs, their PID directory is checked for existence and ownership
* by the specified uid.
* </p>
* @param pids Process IDs that need to be monitored for liveness
* @param expectedUser the userId that is expected to own that process
* @param mockFileOwnerToUid if true (used for testing), then convert File.owner to UID
* @return true if any one of the processes is owned by expectedUser and alive, else false
* @throws IOException on I/O exception
public static boolean isAnyPosixProcessPidDirAlive(Collection<Long> pids, String expectedUser, boolean mockFileOwnerToUid) throws IOException {
File procDir = new File("/proc");
if (!procDir.exists()) {
throw new IOException("Missing process directory " + procDir.getAbsolutePath() + ": method not supported on " + "" + System.getProperty(""));
for (long pid : pids) {
File pidDir = new File(procDir, String.valueOf(pid));
if (!pidDir.exists()) {
// check if existing process is owned by the specified expectedUser, if not, the process is dead
String actualUser;
try {
actualUser = Files.getOwner(pidDir.toPath()).getName();
} catch (NoSuchFileException ex) {
// process died before the expectedUser can be checked
if (mockFileOwnerToUid) {
// code activated in testing to simulate Files.getOwner returning UID (which sometimes happens in runtime)
if (StringUtils.isNumeric(actualUser)) {"Skip mocking, since owner {} of pidDir {} is already numeric", actualUser, pidDir);
} else {
Integer actualUid = cachedUserToUidMap.get(actualUser);
if (actualUid == null) {
actualUid = ServerUtils.getUserId(actualUser);
if (actualUid < 0) {
String err = String.format("Cannot get UID for %s, while mocking the owner of pidDir %s", actualUser, pidDir.getAbsolutePath());
throw new IOException(err);
cachedUserToUidMap.put(actualUser, actualUid);"Found UID {} for {}, while mocking the owner of pidDir {}", actualUid, actualUser, pidDir);
} else {"Found cached UID {} for {}, while mocking the owner of pidDir {}", actualUid, actualUser, pidDir);
actualUser = String.valueOf(actualUid);
// sometimes uid is returned instead of username - if so, try to convert and compare with uid
if (StringUtils.isNumeric(actualUser)) {
// numeric actualUser - this is UID not user
LOG.debug("Process directory {} owner is uid={}", pidDir, actualUser);
int actualUid = Integer.parseInt(actualUser);
Integer expectedUid = cachedUserToUidMap.get(expectedUser);
if (expectedUid == null) {
expectedUid = ServerUtils.getUserId(expectedUser);
if (expectedUid < 0) {
String err = String.format("Cannot get uid for %s to compare with owner id=%d of process directory %s", expectedUser, actualUid, pidDir.getAbsolutePath());
throw new IOException(err);
cachedUserToUidMap.put(expectedUser, expectedUid);
if (expectedUid == actualUid) {
LOG.debug("Process {} is alive and owned by expectedUser {}/{}", pid, expectedUser, expectedUid);
return true;
}"Prior process is dead, since directory {} owner {} is not same as expectedUser {}/{}, " + "likely pid {} was reused for a new process for uid {}, {}", pidDir, actualUser, expectedUser, expectedUid, pid, actualUid, getProcessDesc(pidDir));
} else {
// actualUser is a string
LOG.debug("Process directory {} owner is {}", pidDir, actualUser);
if (expectedUser.equals(actualUser)) {
LOG.debug("Process {} is alive and owned by expectedUser {}", pid, expectedUser);
return true;
}"Prior process is dead, since directory {} owner {} is not same as expectedUser {}, " + "likely pid {} was reused for a new process for actualUser {}, {}}", pidDir, actualUser, expectedUser, pid, actualUser, getProcessDesc(pidDir));
}"None of the processes {} are alive AND owned by expectedUser {}", pids, expectedUser);
return false;
use of in project storm by apache.
the class Utils method isValidConf.
static boolean isValidConf(Map<String, Object> orig, Map<String, Object> deser) {
MapDifference<String, Object> diff = Maps.difference(orig, deser);
if (diff.areEqual()) {
return true;
for (Map.Entry<String, Object> entryOnLeft : diff.entriesOnlyOnLeft().entrySet()) {
LOG.warn("Config property ({}) is found in original config, but missing from the " + "serialized-deserialized config. This is due to an internal error in " + "serialization. Name: {} - Value: {}", entryOnLeft.getKey(), entryOnLeft.getKey(), entryOnLeft.getValue());
for (Map.Entry<String, Object> entryOnRight : diff.entriesOnlyOnRight().entrySet()) {
LOG.warn("Config property ({}) is not found in original config, but present in " + "serialized-deserialized config. This is due to an internal error in " + "serialization. Name: {} - Value: {}", entryOnRight.getKey(), entryOnRight.getKey(), entryOnRight.getValue());
for (Map.Entry<String, MapDifference.ValueDifference<Object>> entryDiffers : diff.entriesDiffering().entrySet()) {
Object leftValue = entryDiffers.getValue().leftValue();
Object rightValue = entryDiffers.getValue().rightValue();
LOG.warn("Config value differs after json serialization. Name: {} - Original Value: {} - DeSer. Value: {}", entryDiffers.getKey(), leftValue, rightValue);
return false;
use of in project storm by apache.
the class AsyncLocalizer method cleanup.
void cleanup() {
try {"Starting cleanup");
LocalizedResourceRetentionSet toClean = new LocalizedResourceRetentionSet(cacheTargetSize);
// need one large set of all and then clean via LRU
for (Map.Entry<String, ConcurrentHashMap<String, LocalizedResource>> t : userArchives.entrySet()) {
LOG.debug("Resources to be cleaned after adding {} archives : {}", t.getKey(), toClean);
for (Map.Entry<String, ConcurrentHashMap<String, LocalizedResource>> t : userFiles.entrySet()) {
LOG.debug("Resources to be cleaned after adding {} files : {}", t.getKey(), toClean);
Set<String> topologiesWithDeletes = new HashSet<>();
try (ClientBlobStore store = getClientBlobStore()) {
Set<LocallyCachedBlob> deletedBlobs = toClean.cleanup(store);
for (LocallyCachedBlob deletedBlob : deletedBlobs) {
String topologyId = ConfigUtils.getIdFromBlobKey(deletedBlob.getKey());
if (topologyId != null) {
HashSet<String> safeTopologyIds = new HashSet<>();
for (String blobKey : topologyBlobs.keySet()) {
LOG.debug("Topologies {} can no longer be considered fully downloaded", topologiesWithDeletes);
// Deleting this early does not hurt anything
topologyBasicDownloaded.keySet().removeIf(topoId -> !safeTopologyIds.contains(topoId));
blobPending.keySet().removeIf(topoId -> !safeTopologyIds.contains(topoId));
try {
forEachTopologyDistDir((p, topologyId) -> {
String topoJarKey = ConfigUtils.masterStormJarKey(topologyId);
String topoCodeKey = ConfigUtils.masterStormCodeKey(topologyId);
String topoConfKey = ConfigUtils.masterStormConfKey(topologyId);
if (!topologyBlobs.containsKey(topoJarKey) && !topologyBlobs.containsKey(topoCodeKey) && !topologyBlobs.containsKey(topoConfKey)) {
} catch (Exception e) {
LOG.error("Could not read topology directories for cleanup", e);
LOG.debug("Resource cleanup: {}", toClean);
Set<String> allUsers = new HashSet<>(userArchives.keySet());
for (String user : allUsers) {
ConcurrentMap<String, LocalizedResource> filesForUser = userFiles.get(user);
ConcurrentMap<String, LocalizedResource> archivesForUser = userArchives.get(user);
if ((filesForUser == null || filesForUser.size() == 0) && (archivesForUser == null || archivesForUser.size() == 0)) {
LOG.debug("removing empty set: {}", user);
try {
LocalizedResource.completelyRemoveUnusedUser(localBaseDir, user);
} catch (IOException e) {
LOG.error("Error trying to delete cached user files", e);
} catch (Exception ex) {
LOG.error("AsyncLocalizer cleanup failure", ex);
} catch (Error error) {
LOG.error("AsyncLocalizer cleanup failure", error);
Utils.exitProcess(20, "AsyncLocalizer cleanup failure");
} finally {"Finish cleanup");
use of in project storm by apache.
the class Nimbus method launchServer.
public void launchServer() throws Exception {
try {
IStormClusterState state = stormClusterState;
NimbusInfo hpi = nimbusHostPortInfo;"Starting Nimbus with conf {}", ConfigUtils.maskPasswords(conf));
// add to nimbuses
state.addNimbusHost(hpi.getHost(), new NimbusSummary(hpi.getHost(), hpi.getPort(), Time.currentTimeSecs(), false, STORM_VERSION));
for (ClusterMetricsConsumerExecutor exec : clusterConsumerExceutors) {
// Leadership coordination may be incomplete when launchServer is called. Previous behavior did a one time check
// which could cause Nimbus to not process TopologyActions.GAIN_LEADERSHIP transitions. Similar problem exists for
// HA Nimbus on being newly elected as leader. Change to a recurring pattern addresses these problems.
timer.scheduleRecurring(3, 5, () -> {
try {
boolean isLeader = isLeader();
if (isLeader && !wasLeader) {
for (String topoId : state.activeStorms()) {
transition(topoId, TopologyActions.GAIN_LEADERSHIP, null);
wasLeader = isLeader;
} catch (Exception e) {
throw new RuntimeException(e);
final boolean doNotReassign = (Boolean) conf.getOrDefault(ServerConfigUtils.NIMBUS_DO_NOT_REASSIGN, false);
timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_MONITOR_FREQ_SECS)), () -> {
try {
if (!doNotReassign) {
} catch (Exception e) {
throw new RuntimeException(e);
// Schedule Nimbus inbox cleaner
final int jarExpSecs = ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_INBOX_JAR_EXPIRATION_SECS));
timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_CLEANUP_INBOX_FREQ_SECS)), () -> {
try {
cleanInbox(getInbox(), jarExpSecs);
} catch (Exception e) {
throw new RuntimeException(e);
// Schedule topology history cleaner
Integer interval = ObjectReader.getInt(conf.get(DaemonConfig.LOGVIEWER_CLEANUP_INTERVAL_SECS), null);
if (interval != null) {
final int lvCleanupAgeMins = ObjectReader.getInt(conf.get(DaemonConfig.LOGVIEWER_CLEANUP_AGE_MINS));
timer.scheduleRecurring(0, interval, () -> {
try {
} catch (Exception e) {
throw new RuntimeException(e);
timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_CREDENTIAL_RENEW_FREQ_SECS)), () -> {
try {
} catch (Exception e) {
throw new RuntimeException(e);
// Periodically make sure the blobstore update time is up to date. This could have failed if Nimbus encountered
// an exception updating the update time, or due to bugs causing a missed update of the blobstore mod time on a blob
// update.
timer.scheduleRecurring(30, ServerConfigUtils.getLocalizerUpdateBlobInterval(conf) * 5, () -> {
try {
} catch (IOException e) {
throw new RuntimeException(e);
metricsRegistry.registerGauge("nimbus:total-available-memory-non-negative", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(supervisorResources -> Math.max(supervisorResources.getAvailableMem(), 0)).sum());
metricsRegistry.registerGauge("nimbus:available-cpu-non-negative", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(supervisorResources -> Math.max(supervisorResources.getAvailableCpu(), 0)).sum());
metricsRegistry.registerGauge("nimbus:total-memory", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(SupervisorResources::getTotalMem).sum());
metricsRegistry.registerGauge("nimbus:total-cpu", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(SupervisorResources::getTotalCpu).sum());
metricsRegistry.registerGauge("nimbus:longest-scheduling-time-ms", () -> {
// We want to update longest scheduling time in real time in case scheduler get stuck
// Get current time before startTime to avoid potential race with scheduler's Timer
Long currTime = Time.nanoTime();
Long startTime = schedulingStartTimeNs.get();
return TimeUnit.NANOSECONDS.toMillis(startTime == null ? longestSchedulingTime.get() : Math.max(currTime - startTime, longestSchedulingTime.get()));
timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.STORM_CLUSTER_METRICS_CONSUMER_PUBLISH_INTERVAL_SECS)), () -> {
try {
if (isLeader()) {
} catch (Exception e) {
throw new RuntimeException(e);
timer.scheduleRecurring(5, 5, clusterMetricSet);
} catch (Exception e) {
if (Utils.exceptionCauseIsInstanceOf(InterruptedException.class, e)) {
throw e;
if (Utils.exceptionCauseIsInstanceOf(InterruptedIOException.class, e)) {
throw e;
LOG.error("Error on initialization of nimbus", e);
Utils.exitProcess(13, "Error on initialization of nimbus");