use of org.apache.geode.SystemConnectException in project geode by apache.
the class InternalDistributedSystem method initialize.
/**
* Initializes this connection to a distributed system with the current configuration state.
*/
private void initialize() {
if (this.originalConfig.getLocators().equals("")) {
if (this.originalConfig.getMcastPort() != 0) {
throw new GemFireConfigException("The " + LOCATORS + " attribute can not be empty when the " + MCAST_PORT + " attribute is non-zero.");
} else {
// no distribution
this.isLoner = true;
}
}
this.config = new RuntimeDistributionConfigImpl(this);
if (!this.isLoner) {
this.attemptingToReconnect = (reconnectAttemptCounter > 0);
}
try {
SocketCreatorFactory.setDistributionConfig(config);
AlertAppender.getInstance().onConnect(this);
// LOG: create LogWriterAppender(s) if log-file or security-log-file is specified
final boolean hasLogFile = this.config.getLogFile() != null && !this.config.getLogFile().equals(new File(""));
final boolean hasSecurityLogFile = this.config.getSecurityLogFile() != null && !this.config.getSecurityLogFile().equals(new File(""));
LogService.configureLoggers(hasLogFile, hasSecurityLogFile);
if (hasLogFile || hasSecurityLogFile) {
// main log file
if (hasLogFile) {
// if log-file then create logWriterAppender
this.logWriterAppender = LogWriterAppenders.getOrCreateAppender(LogWriterAppenders.Identifier.MAIN, this.isLoner, this.config, true);
}
// security log file
if (hasSecurityLogFile) {
// if security-log-file then create securityLogWriterAppender
this.securityLogWriterAppender = LogWriterAppenders.getOrCreateAppender(LogWriterAppenders.Identifier.SECURITY, this.isLoner, this.config, false);
} else {
// let security route to regular log-file or stdout
}
}
// getSecurityLogWriter
if (this.logWriter == null) {
this.logWriter = LogWriterFactory.createLogWriterLogger(this.isLoner, false, this.config, true);
this.logWriter.fine("LogWriter is created.");
}
if (this.securityLogWriter == null) {
// LOG: whole new LogWriterLogger instance for security
this.securityLogWriter = LogWriterFactory.createLogWriterLogger(this.isLoner, true, this.config, false);
this.securityLogWriter.fine("SecurityLogWriter is created.");
}
Services.setLogWriter(this.logWriter);
Services.setSecurityLogWriter(this.securityLogWriter);
this.clock = new DSClock(this.isLoner);
if (this.attemptingToReconnect && logger.isDebugEnabled()) {
logger.debug("This thread is initializing a new DistributedSystem in order to reconnect to other members");
}
// bridge server and will need to enforce the member limit
if (Boolean.getBoolean(InternalLocator.FORCE_LOCATOR_DM_TYPE)) {
this.locatorDMTypeForced = true;
}
// Initialize the Diffie-Hellman and public/private keys
try {
HandShake.initCertsMap(this.config.getSecurityProps());
HandShake.initPrivateKey(this.config.getSecurityProps());
HandShake.initDHKeys(this.config);
} catch (Exception ex) {
throw new GemFireSecurityException(LocalizedStrings.InternalDistributedSystem_PROBLEM_IN_INITIALIZING_KEYS_FOR_CLIENT_AUTHENTICATION.toLocalizedString(), ex);
}
final long offHeapMemorySize = OffHeapStorage.parseOffHeapMemorySize(getConfig().getOffHeapMemorySize());
this.offHeapStore = OffHeapStorage.createOffHeapStorage(this, offHeapMemorySize, this);
// Note: this can only happen on a linux system
if (getConfig().getLockMemory()) {
// This calculation is not exact, but seems fairly close. So far we have
// not loaded much into the heap and the current RSS usage is already
// included the available memory calculation.
long avail = LinuxProcFsStatistics.getAvailableMemory(logger);
long size = offHeapMemorySize + Runtime.getRuntime().totalMemory();
if (avail < size) {
if (ALLOW_MEMORY_LOCK_WHEN_OVERCOMMITTED) {
logger.warn(LocalizedMessage.create(LocalizedStrings.InternalDistributedSystem_MEMORY_OVERCOMMIT_WARN, size - avail));
} else {
throw new IllegalStateException(LocalizedStrings.InternalDistributedSystem_MEMORY_OVERCOMMIT.toLocalizedString(avail, size));
}
}
logger.info("Locking memory. This may take a while...");
GemFireCacheImpl.lockMemory();
logger.info("Finished locking memory.");
}
try {
startInitLocator();
} catch (InterruptedException e) {
throw new SystemConnectException("Startup has been interrupted", e);
}
synchronized (this.isConnectedMutex) {
this.isConnected = true;
}
if (!this.isLoner) {
try {
if (this.quorumChecker != null) {
this.quorumChecker.suspend();
}
this.dm = DistributionManager.create(this);
// fix bug #46324
if (InternalLocator.hasLocator()) {
InternalLocator locator = InternalLocator.getLocator();
getDistributionManager().addHostedLocators(getDistributedMember(), InternalLocator.getLocatorStrings(), locator.isSharedConfigurationEnabled());
}
} finally {
if (this.dm == null && this.quorumChecker != null) {
this.quorumChecker.resume();
}
setDisconnected();
}
} else {
this.dm = new LonerDistributionManager(this, this.logWriter);
}
Assert.assertTrue(this.dm != null);
Assert.assertTrue(this.dm.getSystem() == this);
try {
this.id = this.dm.getChannelId();
} catch (DistributedSystemDisconnectedException e) {
// but during startup we should instead throw a SystemConnectException
throw new SystemConnectException(LocalizedStrings.InternalDistributedSystem_DISTRIBUTED_SYSTEM_HAS_DISCONNECTED.toLocalizedString(), e);
}
synchronized (this.isConnectedMutex) {
this.isConnected = true;
}
if (attemptingToReconnect && (this.startedLocator == null)) {
try {
startInitLocator();
} catch (InterruptedException e) {
throw new SystemConnectException("Startup has been interrupted", e);
}
}
try {
endInitLocator();
} catch (IOException e) {
throw new GemFireIOException("Problem finishing a locator service start", e);
}
if (!statsDisabled) {
// to fix bug 42527 we need a sampler
// even if sampling is not enabled.
this.sampler = new GemFireStatSampler(this);
this.sampler.start();
}
if (this.logWriterAppender != null) {
LogWriterAppenders.startupComplete(LogWriterAppenders.Identifier.MAIN);
}
if (this.securityLogWriterAppender != null) {
LogWriterAppenders.startupComplete(LogWriterAppenders.Identifier.SECURITY);
}
// this.logger.info("ds created", new RuntimeException("DEBUG: STACK"));
// Log any instantiators that were registered before the log writer
// was created
InternalInstantiator.logInstantiators();
} catch (RuntimeException ex) {
this.config.close();
throw ex;
}
resourceListeners = new CopyOnWriteArrayList<ResourceEventsListener>();
this.reconnected = this.attemptingToReconnect;
this.attemptingToReconnect = false;
}
use of org.apache.geode.SystemConnectException in project geode by apache.
the class InternalDistributedSystem method reconnect.
/**
* A reconnect is tried when gemfire is configured to reconnect in case of a required role loss.
* The reconnect will try reconnecting to the distributed system every max-time-out millseconds
* for max-number-of-tries configured in gemfire.properties file. It uses the cache.xml file to
* intialize the cache and create regions.
*/
private void reconnect(boolean forcedDisconnect, String reason) {
// Collect all the state for cache
// Collect all the state for Regions
// Close the cache,
// loop trying to connect, waiting before each attempt
//
// If reconnecting for lost-roles the reconnected system's cache will decide
// whether the reconnected system should stay up. After max-tries we will
// give up.
//
// If reconnecting for forced-disconnect we ignore max-tries and keep attempting
// to join the distributed system until successful
this.attemptingToReconnect = true;
InternalDistributedSystem ids = InternalDistributedSystem.getAnyInstance();
if (ids == null) {
ids = this;
}
// first save the current cache description. This is created by
// the membership manager when forced-disconnect starts. If we're
// reconnecting for lost roles then this will be null
String cacheXML = null;
List<CacheServerCreation> cacheServerCreation = null;
InternalCache cache = GemFireCacheImpl.getInstance();
if (cache != null) {
cacheXML = cache.getCacheConfig().getCacheXMLDescription();
cacheServerCreation = cache.getCacheConfig().getCacheServerCreation();
}
DistributionConfig oldConfig = ids.getConfig();
Properties configProps = getProperties();
int timeOut = oldConfig.getMaxWaitTimeForReconnect();
int maxTries = oldConfig.getMaxNumReconnectTries();
final boolean isDebugEnabled = logger.isDebugEnabled();
if (Thread.currentThread().getName().equals("DisconnectThread")) {
if (isDebugEnabled) {
logger.debug("changing thread name to ReconnectThread");
}
Thread.currentThread().setName("ReconnectThread");
}
// get the membership manager for quorum checks
MembershipManager mbrMgr = this.dm.getMembershipManager();
this.quorumChecker = mbrMgr.getQuorumChecker();
if (logger.isDebugEnabled()) {
if (quorumChecker == null) {
logger.debug("No quorum checks will be performed during reconnect attempts");
} else {
logger.debug("Initialized quorum checking service: {}", quorumChecker);
}
}
// LOG:CLEANUP: deal with reconnect and INHIBIT_DM_BANNER -- this should be ok now
String appendToLogFile = System.getProperty(APPEND_TO_LOG_FILE);
if (appendToLogFile == null) {
System.setProperty(APPEND_TO_LOG_FILE, "true");
}
String inhibitBanner = System.getProperty(InternalLocator.INHIBIT_DM_BANNER);
if (inhibitBanner == null) {
System.setProperty(InternalLocator.INHIBIT_DM_BANNER, "true");
}
if (forcedDisconnect) {
systemAttemptingReconnect = this;
}
try {
while (this.reconnectDS == null || !this.reconnectDS.isConnected()) {
if (isReconnectCancelled()) {
break;
}
if (!forcedDisconnect) {
if (isDebugEnabled) {
logger.debug("Max number of tries : {} and max time out : {}", maxTries, timeOut);
}
if (reconnectAttemptCounter >= maxTries) {
if (isDebugEnabled) {
logger.debug("Stopping the checkrequiredrole thread because reconnect : {} reached the max number of reconnect tries : {}", reconnectAttemptCounter, maxTries);
}
throw new CacheClosedException(LocalizedStrings.InternalDistributedSystem_SOME_REQUIRED_ROLES_MISSING.toLocalizedString());
}
}
if (reconnectAttemptCounter == 0) {
reconnectAttemptTime = System.currentTimeMillis();
}
reconnectAttemptCounter++;
if (isReconnectCancelled()) {
return;
}
logger.info("Disconnecting old DistributedSystem to prepare for a reconnect attempt");
try {
disconnect(true, reason, false);
} catch (Exception ee) {
logger.warn("Exception disconnecting for reconnect", ee);
}
try {
reconnectLock.wait(timeOut);
} catch (InterruptedException e) {
logger.warn(LocalizedMessage.create(LocalizedStrings.InternalDistributedSystem_WAITING_THREAD_FOR_RECONNECT_GOT_INTERRUPTED));
Thread.currentThread().interrupt();
return;
}
if (isReconnectCancelled()) {
return;
}
logger.info(LocalizedMessage.create(LocalizedStrings.DISTRIBUTED_SYSTEM_RECONNECTING, new Object[] { reconnectAttemptCounter }));
int savNumOfTries = reconnectAttemptCounter;
try {
// notify listeners of each attempt and then again after successful
notifyReconnectListeners(this, this.reconnectDS, true);
if (this.locatorDMTypeForced) {
System.setProperty(InternalLocator.FORCE_LOCATOR_DM_TYPE, "true");
}
configProps.put(DistributionConfig.DS_RECONNECTING_NAME, Boolean.TRUE);
if (quorumChecker != null) {
configProps.put(DistributionConfig.DS_QUORUM_CHECKER_NAME, quorumChecker);
}
InternalDistributedSystem newDS = null;
if (isReconnectCancelled()) {
return;
}
try {
newDS = (InternalDistributedSystem) connect(configProps);
} catch (CancelException e) {
if (isReconnectCancelled()) {
return;
} else {
throw e;
}
} finally {
if (newDS == null && quorumChecker != null) {
// make sure the quorum checker is listening for messages from former members
quorumChecker.resume();
}
}
if (this.reconnectCancelled) {
newDS.disconnect();
continue;
}
this.reconnectDS = newDS;
} catch (SystemConnectException e) {
logger.debug("Attempt to reconnect failed with SystemConnectException");
if (e.getMessage().contains("Rejecting the attempt of a member using an older version")) {
logger.warn(LocalizedMessage.create(LocalizedStrings.InternalDistributedSystem_EXCEPTION_OCCURRED_WHILE_TRYING_TO_CONNECT_THE_SYSTEM_DURING_RECONNECT), e);
attemptingToReconnect = false;
return;
}
continue;
} catch (GemFireConfigException e) {
if (isDebugEnabled) {
logger.debug("Attempt to reconnect failed with GemFireConfigException");
}
continue;
} catch (Exception ee) {
logger.warn(LocalizedMessage.create(LocalizedStrings.InternalDistributedSystem_EXCEPTION_OCCURRED_WHILE_TRYING_TO_CONNECT_THE_SYSTEM_DURING_RECONNECT), ee);
attemptingToReconnect = false;
return;
} finally {
if (this.locatorDMTypeForced) {
System.getProperties().remove(InternalLocator.FORCE_LOCATOR_DM_TYPE);
}
reconnectAttemptCounter = savNumOfTries;
}
DM newDM = this.reconnectDS.getDistributionManager();
if (newDM instanceof DistributionManager) {
// a cache
if (((DistributionManager) newDM).getDMType() != DistributionManager.ADMIN_ONLY_DM_TYPE) {
try {
CacheConfig config = new CacheConfig();
if (cacheXML != null) {
config.setCacheXMLDescription(cacheXML);
}
cache = GemFireCacheImpl.create(this.reconnectDS, config);
createAndStartCacheServers(cacheServerCreation, cache);
if (cache.getCachePerfStats().getReliableRegionsMissing() == 0) {
reconnectAttemptCounter = 0;
} else {
// this try failed. The new cache will call reconnect again
}
} catch (CacheXmlException e) {
logger.warn("Exception occurred while trying to create the cache during reconnect", e);
reconnectDS.disconnect();
reconnectDS = null;
reconnectCancelled = true;
break;
} catch (CancelException ignor) {
logger.warn("Exception occurred while trying to create the cache during reconnect", ignor);
reconnectDS.disconnect();
reconnectDS = null;
} catch (Exception e) {
logger.warn(LocalizedMessage.create(LocalizedStrings.InternalDistributedSystem_EXCEPTION_OCCURRED_WHILE_TRYING_TO_CREATE_THE_CACHE_DURING_RECONNECT), e);
}
}
}
if (reconnectDS != null && reconnectDS.isConnected()) {
// make sure the new DS and cache are stable before exiting this loop
try {
Thread.sleep(config.getMemberTimeout() * 3);
} catch (InterruptedException e) {
logger.info("Reconnect thread has been interrupted - exiting");
Thread.currentThread().interrupt();
return;
}
}
}
if (isReconnectCancelled()) {
reconnectDS.disconnect();
} else {
reconnectDS.isReconnectingDS = false;
notifyReconnectListeners(this, this.reconnectDS, false);
}
} finally {
systemAttemptingReconnect = null;
attemptingToReconnect = false;
if (appendToLogFile == null) {
System.getProperties().remove(APPEND_TO_LOG_FILE);
} else {
System.setProperty(APPEND_TO_LOG_FILE, appendToLogFile);
}
if (inhibitBanner == null) {
System.getProperties().remove(InternalLocator.INHIBIT_DM_BANNER);
} else {
System.setProperty(InternalLocator.INHIBIT_DM_BANNER, inhibitBanner);
}
if (quorumChecker != null) {
mbrMgr.releaseQuorumChecker(quorumChecker);
}
}
if (isReconnectCancelled()) {
logger.debug("reconnect can no longer be done because of an explicit disconnect");
if (reconnectDS != null) {
reconnectDS.disconnect();
}
attemptingToReconnect = false;
return;
} else {
logger.info("Reconnect completed.\nNew DistributedSystem is {}\nNew Cache is {}", reconnectDS, cache);
}
}
use of org.apache.geode.SystemConnectException in project geode by apache.
the class DistributionManager method sendStartupMessage.
/**
* Sends a startup message and waits for a response. Returns true if response received; false if
* it timed out or there are no peers.
*/
protected boolean sendStartupMessage(StartupOperation startupOperation, boolean cancelOnTimeout) throws InterruptedException {
if (Thread.interrupted())
throw new InterruptedException();
this.receivedStartupResponse = false;
boolean ok = false;
// Be sure to add ourself to the equivalencies list!
Set equivs = StartupMessage.getMyAddresses(this);
if (equivs == null || equivs.size() == 0) {
// no network interface
equivs = new HashSet();
try {
equivs.add(SocketCreator.getLocalHost());
} catch (UnknownHostException e) {
// can't even get localhost
if (getViewMembers().size() > 1) {
throw new SystemConnectException("Unable to examine network cards and other members exist");
}
}
}
setEquivalentHosts(equivs);
setEnforceUniqueZone(getConfig().getEnforceUniqueHost());
String redundancyZone = getConfig().getRedundancyZone();
if (redundancyZone != null && !redundancyZone.equals("")) {
setEnforceUniqueZone(true);
}
setRedundancyZone(getDistributionManagerId(), redundancyZone);
if (logger.isDebugEnabled()) {
StringBuffer sb = new StringBuffer();
sb.append("Equivalent IPs for this host: ");
Iterator it = equivs.iterator();
while (it.hasNext()) {
InetAddress in = (InetAddress) it.next();
sb.append(in.toString());
if (it.hasNext()) {
sb.append(", ");
}
}
// while
logger.debug(sb);
}
// we need to send this to everyone else; even admin vm
Set allOthers = new HashSet(getViewMembers());
allOthers.remove(getDistributionManagerId());
if (allOthers.isEmpty()) {
// no peers, we are alone.
return false;
}
try {
ok = startupOperation.sendStartupMessage(allOthers, STARTUP_TIMEOUT, equivs, redundancyZone, enforceUniqueZone());
} catch (Exception re) {
throw new SystemConnectException(LocalizedStrings.DistributionManager_ONE_OR_MORE_PEERS_GENERATED_EXCEPTIONS_DURING_CONNECTION_ATTEMPT.toLocalizedString(), re);
}
if (this.rejectionMessage != null) {
throw new IncompatibleSystemException(rejectionMessage);
}
boolean isAdminDM = getId().getVmKind() == DistributionManager.ADMIN_ONLY_DM_TYPE || getId().getVmKind() == DistributionManager.LOCATOR_DM_TYPE || DistributionManager.isDedicatedAdminVM || Boolean.getBoolean(InternalLocator.FORCE_LOCATOR_DM_TYPE);
boolean receivedAny = this.receivedStartupResponse;
if (!ok) {
// someone didn't reply
int unresponsiveCount;
synchronized (unfinishedStartupsLock) {
if (unfinishedStartups == null)
unresponsiveCount = 0;
else
unresponsiveCount = unfinishedStartups.size();
if (unresponsiveCount != 0) {
if (Boolean.getBoolean("DistributionManager.requireAllStartupResponses")) {
throw new SystemConnectException(LocalizedStrings.DistributionManager_NO_STARTUP_REPLIES_FROM_0.toLocalizedString(unfinishedStartups));
}
}
}
// If there are other members, we must receive at least _one_ response
if (allOthers.size() != 0) {
// there exist others
if (!receivedAny) {
// and none responded
StringBuffer sb = new StringBuffer();
Iterator itt = allOthers.iterator();
while (itt.hasNext()) {
Object m = itt.next();
sb.append(m.toString());
if (itt.hasNext())
sb.append(", ");
}
if (DEBUG_NO_ACKNOWLEDGEMENTS) {
printStacks(allOthers, false);
}
throw new SystemConnectException(LocalizedStrings.DistributionManager_RECEIVED_NO_CONNECTION_ACKNOWLEDGMENTS_FROM_ANY_OF_THE_0_SENIOR_CACHE_MEMBERS_1.toLocalizedString(new Object[] { Integer.toString(allOthers.size()), sb.toString() }));
}
// and none responded
}
// there exist others
InternalDistributedMember e = getElderId();
if (e != null) {
// an elder exists
boolean unresponsiveElder;
synchronized (unfinishedStartupsLock) {
if (unfinishedStartups == null)
unresponsiveElder = false;
else
unresponsiveElder = unfinishedStartups.contains(e);
}
if (unresponsiveElder) {
logger.warn(LocalizedMessage.create(LocalizedStrings.DistributionManager_FORCING_AN_ELDER_JOIN_EVENT_SINCE_A_STARTUP_RESPONSE_WAS_NOT_RECEIVED_FROM_ELDER__0_, e));
handleManagerStartup(e);
}
}
// an elder exists
}
// someone didn't reply
return receivedAny;
}
use of org.apache.geode.SystemConnectException in project geode by apache.
the class LocatorDUnitTest method testNonSSLLocatorDiesWhenConnectingToSSLLocator.
@Test
public void testNonSSLLocatorDiesWhenConnectingToSSLLocator() throws Exception {
IgnoredException.addIgnoredException("Unrecognized SSL message, plaintext connection");
IgnoredException.addIgnoredException("LocatorCancelException");
disconnectAllFromDS();
Host host = Host.getHost(0);
final String hostname = NetworkUtils.getServerHostName(host);
VM loc1 = host.getVM(1);
VM loc2 = host.getVM(2);
final Properties properties = new Properties();
properties.put(MCAST_PORT, "0");
properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "false");
properties.put(DISABLE_AUTO_RECONNECT, "true");
properties.put(MEMBER_TIMEOUT, "2000");
properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel());
properties.put(ENABLE_CLUSTER_CONFIGURATION, "false");
properties.put(SSL_CIPHERS, "any");
properties.put(SSL_PROTOCOLS, "any");
properties.put(SSL_KEYSTORE, getSingleKeyKeystore());
properties.put(SSL_KEYSTORE_PASSWORD, "password");
properties.put(SSL_KEYSTORE_TYPE, "JKS");
properties.put(SSL_TRUSTSTORE, getSingleKeyKeystore());
properties.put(SSL_TRUSTSTORE_PASSWORD, "password");
properties.put(SSL_REQUIRE_AUTHENTICATION, "true");
properties.put(SSL_ENABLED_COMPONENTS, SecurableCommunicationChannel.LOCATOR.getConstant());
try {
// we set port1 so that the state file gets cleaned up later.
port1 = loc1.invoke(() -> startLocatorWithRandomPort(properties));
loc1.invoke("expect only one member in system", () -> expectSystemToContainThisManyMembers(1));
properties.remove(SSL_ENABLED_COMPONENTS);
properties.put(LOCATORS, hostname + "[" + port1 + "]");
// we set port2 so that the state file gets cleaned up later.
port2 = loc2.invoke("start Locator2", () -> {
// Sometimes the LocatorCancelException becomes a SystemConnectException, which then causes
// an RMIException. This is a normal part of the connect failing.
int port;
try {
port = startLocatorWithRandomPort(properties);
} catch (SystemConnectException expected_sometimes) {
return 0;
}
return port;
});
loc1.invoke("expect only one member in system", () -> expectSystemToContainThisManyMembers(1));
} finally {
loc1.invoke("stop locator", () -> stopLocator());
// loc2 should die from inability to connect.
loc2.invoke(() -> Awaitility.await("locator2 dies").atMost(15, TimeUnit.SECONDS).until(() -> Locator.getLocator() == null));
}
}
use of org.apache.geode.SystemConnectException in project geode by apache.
the class GMSMemberFactory method newMembershipManager.
public MembershipManager newMembershipManager(DistributedMembershipListener listener, DistributionConfig config, RemoteTransportConfig transport, DMStats stats) throws DistributionException {
Services services = new Services(listener, config, transport, stats);
try {
services.init();
services.start();
} catch (ConnectionException e) {
throw new DistributionException(LocalizedStrings.MemberFactory_UNABLE_TO_CREATE_MEMBERSHIP_MANAGER.toLocalizedString(), e);
} catch (GemFireConfigException | SystemConnectException | GemFireSecurityException e) {
throw e;
} catch (RuntimeException e) {
Services.getLogger().error("Unexpected problem starting up membership services", e);
throw new SystemConnectException("Problem starting up membership services", e);
}
return (MembershipManager) services.getManager();
}
Aggregations