use of org.apache.sling.api.resource.PersistenceException in project sling by apache.
the class OakClusterViewService method readOrDefineClusterId.
/**
* oak's discovery-lite can opt to not provide a clusterViewId eg in the
* single-VM case. (for clusters discovery-lite normally defines the
* clusterViewId, as it is the one responsible for defining the membership
* too) Thus if we're not getting an id here we have to define one here. (we
* can typically assume that this corresponds to a singleVM case, but that's
* not a 100% requirement). This id must be stored to ensure the contract
* that the clusterId is stable across restarts. For that, the id is stored
* under /var/discovery/oak (and to account for odd/edgy cases we'll do a
* retry when storing the id, in case we'd run into conflicts, even though
* they should not occur in singleVM cases)
*
* @param resourceResolver the ResourceResolver with which to read or write
* the clusterId properties under /var/discovery/oak
* @return the clusterId to be used - either the one read or defined
* at /var/discovery/oak - or the slingId in case of non-fixable exceptions
* @throws PersistenceException when /var/discovery/oak could not be
* accessed or auto-created
*/
private String readOrDefineClusterId(ResourceResolver resourceResolver) throws PersistenceException {
//TODO: if Config gets a specific, public getDiscoveryResourcePath, this can be simplified:
final String clusterInstancesPath = config.getClusterInstancesPath();
final String discoveryResourcePath = clusterInstancesPath.substring(0, clusterInstancesPath.lastIndexOf("/", clusterInstancesPath.length() - 2));
final int MAX_RETRIES = 5;
for (int retryCnt = 0; retryCnt < MAX_RETRIES; retryCnt++) {
Resource varDiscoveryOak = resourceResolver.getResource(discoveryResourcePath);
if (varDiscoveryOak == null) {
varDiscoveryOak = ResourceHelper.getOrCreateResource(resourceResolver, discoveryResourcePath);
}
if (varDiscoveryOak == null) {
logger.error("readOrDefinedClusterId: Could not create: " + discoveryResourcePath);
throw new RuntimeException("could not create " + discoveryResourcePath);
}
ModifiableValueMap props = varDiscoveryOak.adaptTo(ModifiableValueMap.class);
if (props == null) {
logger.error("readOrDefineClusterId: Could not adaptTo ModifiableValueMap: " + varDiscoveryOak);
throw new RuntimeException("could not adaptTo ModifiableValueMap: " + varDiscoveryOak);
}
Object clusterIdObj = props.get(PROPERTY_CLUSTER_ID);
String clusterId = (clusterIdObj == null) ? null : String.valueOf(clusterIdObj);
if (clusterId != null && clusterId.length() > 0) {
logger.trace("readOrDefineClusterId: read clusterId from repo as {}", clusterId);
return clusterId;
}
// must now define a new clusterId and store it under /var/discovery/oak
final String newClusterId = UUID.randomUUID().toString();
props.put(PROPERTY_CLUSTER_ID, newClusterId);
props.put(PROPERTY_CLUSTER_ID_DEFINED_BY, getSlingId());
props.put(PROPERTY_CLUSTER_ID_DEFINED_AT, Calendar.getInstance());
try {
logger.info("readOrDefineClusterId: storing new clusterId as " + newClusterId);
resourceResolver.commit();
return newClusterId;
} catch (PersistenceException e) {
logger.warn("readOrDefineClusterId: could not persist clusterId " + "(retrying in 1 sec max " + (MAX_RETRIES - retryCnt - 1) + " more times: " + e, e);
try {
Thread.sleep(1000);
} catch (InterruptedException e1) {
logger.warn("readOrDefineClusterId: got interrupted: " + e1, e1);
}
logger.info("readOrDefineClusterId: retrying now.");
}
}
throw new RuntimeException("failed to write new clusterId (see log file earlier for more details)");
}
use of org.apache.sling.api.resource.PersistenceException in project sling by apache.
the class OakViewChecker method resetLeaderElectionId.
/**
* Hook that will cause a reset of the leaderElectionId
* on next invocation of issueClusterLocalHeartbeat.
* @return true if the leaderElectionId was reset - false if that was not
* necessary as that happened earlier already and it has not propagated
* yet to the ./clusterInstances in the meantime
*/
public boolean resetLeaderElectionId() {
ResourceResolver resourceResolver = null;
try {
final String myClusterNodePath = getLocalClusterNodePath();
resourceResolver = getResourceResolver();
if (resourceResolver == null) {
logger.warn("resetLeaderElectionId: could not login, new leaderElectionId will be calculated upon next heartbeat only!");
return false;
}
String newLeaderElectionId = newLeaderElectionId();
final Resource resource = ResourceHelper.getOrCreateResource(resourceResolver, myClusterNodePath);
final ModifiableValueMap resourceMap = resource.adaptTo(ModifiableValueMap.class);
resourceMap.put(PROPERTY_ID_RUNTIME, runtimeId);
// SLING-4765 : store more infos to be able to be more verbose on duplicate slingId/ghost detection
String slingHomePath = "n/a";
if (slingSettingsService != null && slingSettingsService.getSlingHomePath() != null) {
slingHomePath = slingSettingsService.getSlingHomePath();
}
resourceMap.put(PROPERTY_ID_SLING_HOME_PATH, slingHomePath);
final String endpointsAsString = getEndpointsAsString();
resourceMap.put(PROPERTY_ID_ENDPOINTS, endpointsAsString);
Calendar leaderElectionCreatedAt = Calendar.getInstance();
resourceMap.put("leaderElectionId", newLeaderElectionId);
resourceMap.put("leaderElectionIdCreatedAt", leaderElectionCreatedAt);
logger.info("resetLeaderElectionId: storing my runtimeId: {}, endpoints: {}, sling home path: {}, new leaderElectionId: {}, created at: {}", new Object[] { runtimeId, endpointsAsString, slingHomePath, newLeaderElectionId, leaderElectionCreatedAt });
resourceResolver.commit();
} catch (LoginException e) {
logger.error("resetLeaderElectionid: could not login: " + e, e);
} catch (PersistenceException e) {
logger.error("resetLeaderElectionid: got PersistenceException: " + e, e);
} finally {
if (resourceResolver != null) {
resourceResolver.close();
}
}
return true;
}
use of org.apache.sling.api.resource.PersistenceException in project sling by apache.
the class HeartbeatHandler method issueClusterLocalHeartbeat.
/** Issue a cluster local heartbeat (into the repository) **/
protected void issueClusterLocalHeartbeat() {
if (logger.isDebugEnabled()) {
logger.debug("issueClusterLocalHeartbeat: storing cluster-local heartbeat to repository for " + slingId);
}
ResourceResolver resourceResolver = null;
final String myClusterNodePath = getLocalClusterNodePath();
final Calendar currentTime = Calendar.getInstance();
try {
resourceResolver = getResourceResolver();
if (resourceResolver == null) {
logger.error("issueClusterLocalHeartbeat: no resourceresolver available!");
return;
}
final Resource resource = ResourceHelper.getOrCreateResource(resourceResolver, myClusterNodePath);
final ModifiableValueMap resourceMap = resource.adaptTo(ModifiableValueMap.class);
if (firstHeartbeatWritten != -1 && lastHeartbeatWritten != null) {
// SLING-2892: additional paranoia check
// after the first heartbeat, check if there's someone else using
// the same sling.id in this cluster
final long timeSinceFirstHeartbeat = System.currentTimeMillis() - firstHeartbeatWritten;
if (timeSinceFirstHeartbeat > 2 * config.getHeartbeatInterval()) {
// but wait at least 2 heartbeat intervals to handle the situation
// where a bundle is refreshed, and startup cases.
final Calendar lastHeartbeat = resourceMap.get(PROPERTY_ID_LAST_HEARTBEAT, Calendar.class);
if (lastHeartbeat != null) {
// the last time
if (!lastHeartbeatWritten.getTime().equals(lastHeartbeat.getTime())) {
// then we've likely hit the situation where there is another
// sling instance accessing the same repository (ie in the same cluster)
// using the same sling.id - hence writing to the same
// resource
invalidateCurrentEstablishedView();
discoveryServiceImpl.handleTopologyChanging();
logger.error("issueClusterLocalHeartbeat: SLING-2892: Detected unexpected, concurrent update of: " + myClusterNodePath + " 'lastHeartbeat'. If not done manually, " + "this likely indicates that there is more than 1 instance running in this cluster" + " with the same sling.id. My sling.id is " + slingId + "." + " Check for sling.id.file in your installation of all instances in this cluster " + "to verify this! Duplicate sling.ids are not allowed within a cluster!");
}
}
}
// SLING-2901 : robust paranoia check: on first heartbeat write, the
// 'runtimeId' is set as a property (ignoring any former value).
// If in subsequent calls the value of 'runtimeId' changes, then
// there is someone else around with the same slingId.
final String readRuntimeId = resourceMap.get(PROPERTY_ID_RUNTIME, String.class);
if (readRuntimeId == null) {
// SLING-3977
// someone deleted the resource property
firstHeartbeatWritten = -1;
} else if (!runtimeId.equals(readRuntimeId)) {
invalidateCurrentEstablishedView();
discoveryServiceImpl.handleTopologyChanging();
final String slingHomePath = slingSettingsService == null ? "n/a" : slingSettingsService.getSlingHomePath();
final String endpointsAsString = getEndpointsAsString();
final String readEndpoints = resourceMap.get(PROPERTY_ID_ENDPOINTS, String.class);
final String readSlingHomePath = resourceMap.get(PROPERTY_ID_SLING_HOME_PATH, String.class);
logger.error("issueClusterLocalHeartbeat: SLING-2901: Detected more than 1 instance running in this cluster " + " with the same sling.id. " + "My sling.id: " + slingId + ", my runtimeId: " + runtimeId + ", my endpoints: " + endpointsAsString + ", my slingHomePath: " + slingHomePath + ", other runtimeId: " + readRuntimeId + ", other endpoints: " + readEndpoints + ", other slingHomePath:" + readSlingHomePath + " Check for sling.id.file in your installation of all instances in this cluster " + "to verify this! Duplicate sling.ids are not allowed within a cluster!");
logger.error("issueClusterLocalHeartbeat: sending TOPOLOGY_CHANGING before self-disabling.");
discoveryServiceImpl.forcedShutdown();
logger.error("issueClusterLocalHeartbeat: disabling discovery.impl");
activated = false;
if (context != null) {
// disable all components
try {
context.getBundleContext().getBundle().stop();
} catch (BundleException e) {
logger.warn("issueClusterLocalHeartbeat: could not stop bundle: " + e, e);
// then disable all compnoents instead
context.disableComponent(null);
}
}
return;
}
}
resourceMap.put(PROPERTY_ID_LAST_HEARTBEAT, currentTime);
if (firstHeartbeatWritten == -1) {
resourceMap.put(PROPERTY_ID_RUNTIME, runtimeId);
// SLING-4765 : store more infos to be able to be more verbose on duplicate slingId/ghost detection
final String slingHomePath = slingSettingsService == null ? "n/a" : slingSettingsService.getSlingHomePath();
resourceMap.put(PROPERTY_ID_SLING_HOME_PATH, slingHomePath);
final String endpointsAsString = getEndpointsAsString();
resourceMap.put(PROPERTY_ID_ENDPOINTS, endpointsAsString);
logger.info("issueClusterLocalHeartbeat: storing my runtimeId: {}, endpoints: {} and sling home path: {}", new Object[] { runtimeId, endpointsAsString, slingHomePath });
}
if (resetLeaderElectionId || !resourceMap.containsKey("leaderElectionId")) {
// the new leaderElectionId might have been 'pre set' in the field 'newLeaderElectionId'
// if that's the case, use that one, otherwise calculate a new one now
final String newLeaderElectionId = this.newLeaderElectionId != null ? this.newLeaderElectionId : newLeaderElectionId(resourceResolver);
this.newLeaderElectionId = null;
resourceMap.put("leaderElectionId", newLeaderElectionId);
resourceMap.put("leaderElectionIdCreatedAt", new Date());
logger.info("issueClusterLocalHeartbeat: set leaderElectionId to " + newLeaderElectionId + " (resetLeaderElectionId: " + resetLeaderElectionId + ")");
if (votingHandler != null) {
votingHandler.setLeaderElectionId(newLeaderElectionId);
}
resetLeaderElectionId = false;
}
logger.debug("issueClusterLocalHeartbeat: committing cluster-local heartbeat to repository for {}", slingId);
resourceResolver.commit();
logger.debug("issueClusterLocalHeartbeat: committed cluster-local heartbeat to repository for {}", slingId);
// SLING-2892: only in success case: remember the last heartbeat value written
lastHeartbeatWritten = currentTime;
// and set the first heartbeat written value - if it is not already set
if (firstHeartbeatWritten == -1) {
firstHeartbeatWritten = System.currentTimeMillis();
}
} catch (LoginException e) {
logger.error("issueHeartbeat: could not log in administratively: " + e, e);
} catch (PersistenceException e) {
logger.error("issueHeartbeat: Got a PersistenceException: " + myClusterNodePath + " " + e, e);
} finally {
if (resourceResolver != null) {
resourceResolver.close();
}
}
}
use of org.apache.sling.api.resource.PersistenceException in project sling by apache.
the class HeartbeatHandler method startNewVoting.
/**
* Management function to trigger the otherwise algorithm-dependent
* start of a new voting.
* This can make sense when explicitly trying to force a leader
* change (which is otherwise not allowed by the discovery API)
*/
public void startNewVoting() {
logger.info("startNewVoting: explicitly starting new voting...");
ResourceResolver resourceResolver = null;
try {
resourceResolver = getResourceResolver();
final Resource clusterNodesRes = ResourceHelper.getOrCreateResource(resourceResolver, config.getClusterInstancesPath());
final Set<String> liveInstances = ViewHelper.determineLiveInstances(clusterNodesRes, config);
doStartNewVoting(resourceResolver, liveInstances);
logger.info("startNewVoting: explicit new voting was started.");
} catch (LoginException e) {
logger.error("startNewVoting: could not log in administratively: " + e, e);
} catch (PersistenceException e) {
logger.error("startNewVoting: encountered a persistence exception during view check: " + e, e);
} finally {
if (resourceResolver != null) {
resourceResolver.close();
}
}
}
use of org.apache.sling.api.resource.PersistenceException in project sling by apache.
the class JobHandler method reassign.
/**
* Reassign to a new instance.
*/
public void reassign() {
final QueueInfo queueInfo = this.configuration.getQueueConfigurationManager().getQueueInfo(job.getTopic());
// Sanity check if queue configuration has changed
final TopologyCapabilities caps = this.configuration.getTopologyCapabilities();
final String targetId = (caps == null ? null : caps.detectTarget(job.getTopic(), job.getProperties(), queueInfo));
final ResourceResolver resolver = this.configuration.createResourceResolver();
try {
final Resource jobResource = resolver.getResource(job.getResourcePath());
if (jobResource != null) {
try {
final ValueMap vm = ResourceHelper.getValueMap(jobResource);
final String newPath = this.configuration.getUniquePath(targetId, job.getTopic(), job.getId(), job.getProperties());
final Map<String, Object> props = new HashMap<>(vm);
props.remove(Job.PROPERTY_JOB_QUEUE_NAME);
if (targetId == null) {
props.remove(Job.PROPERTY_JOB_TARGET_INSTANCE);
} else {
props.put(Job.PROPERTY_JOB_TARGET_INSTANCE, targetId);
}
props.remove(Job.PROPERTY_JOB_STARTED_TIME);
try {
ResourceHelper.getOrCreateResource(resolver, newPath, props);
resolver.delete(jobResource);
resolver.commit();
} catch (final PersistenceException pe) {
this.configuration.getMainLogger().warn("Unable to reassign job " + job.getId(), pe);
}
} catch (final InstantiationException ie) {
// something happened with the resource in the meantime
this.configuration.getMainLogger().debug("Unable to instantiate job", ie);
}
}
} finally {
resolver.close();
}
}
Aggregations