use of org.apache.nifi.cluster.ConnectionException in project nifi by apache.
the class StandardFlowService method connect.
private ConnectionResponse connect(final boolean retryOnCommsFailure, final boolean retryIndefinitely, final DataFlow dataFlow) throws ConnectionException {
readLock.lock();
try {
logger.info("Connecting Node: " + nodeId);
// create connection request message
final ConnectionRequest request = new ConnectionRequest(nodeId, dataFlow);
final ConnectionRequestMessage requestMsg = new ConnectionRequestMessage();
requestMsg.setConnectionRequest(request);
// send connection request to cluster manager
/*
* Try to get a current copy of the cluster's dataflow from the manager
* for ten times, sleeping between attempts. Ten times should be
* enough because the manager will register the node as connecting
* and therefore, no other changes to the cluster flow can occur.
*
* However, the manager needs to obtain a current data flow within
* maxAttempts * tryLaterSeconds or else the node will fail to startup.
*/
final int maxAttempts = 10;
ConnectionResponse response = null;
for (int i = 0; i < maxAttempts || retryIndefinitely; i++) {
try {
response = senderListener.requestConnection(requestMsg).getConnectionResponse();
if (response.shouldTryLater()) {
logger.info("Requested by cluster coordinator to retry connection in " + response.getTryLaterSeconds() + " seconds with explanation: " + response.getRejectionReason());
try {
Thread.sleep(response.getTryLaterSeconds() * 1000);
} catch (final InterruptedException ie) {
// we were interrupted, so finish quickly
Thread.currentThread().interrupt();
break;
}
} else if (response.getRejectionReason() != null) {
logger.warn("Connection request was blocked by cluster coordinator with the explanation: " + response.getRejectionReason());
// set response to null and treat a firewall blockage the same as getting no response from manager
response = null;
break;
} else {
// we received a successful connection response from manager
break;
}
} catch (final NoClusterCoordinatorException ncce) {
logger.warn("There is currently no Cluster Coordinator. This often happens upon restart of NiFi when running an embedded ZooKeeper. Will register this node " + "to become the active Cluster Coordinator and will attempt to connect to cluster again");
controller.registerForClusterCoordinator(true);
try {
Thread.sleep(1000L);
} catch (final InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
} catch (final Exception pe) {
// could not create a socket and communicate with manager
logger.warn("Failed to connect to cluster due to: " + pe);
if (logger.isDebugEnabled()) {
logger.warn("", pe);
}
if (retryOnCommsFailure) {
try {
Thread.sleep(response == null ? 5000 : response.getTryLaterSeconds());
} catch (final InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
} else {
break;
}
}
}
if (response == null) {
// if response is null, then either we had IO problems or we were blocked by firewall or we couldn't determine manager's address
return response;
} else if (response.shouldTryLater()) {
// if response indicates we should try later, then coordinator was unable to service our request. Just load local flow and move on.
// when the cluster coordinator is able to service requests, this node's heartbeat will trigger the cluster coordinator to reach
// out to this node and re-connect to the cluster.
logger.info("Received a 'try again' response from Cluster Coordinator when attempting to connect to cluster with explanation '" + response.getRejectionReason() + "'. However, the maximum number of retries have already completed. Will load local flow and connect to the cluster when able.");
return null;
} else {
// persist node uuid and index returned by NCM and return the response to the caller
try {
// Ensure that we have registered our 'cluster node configuration' state key
final Map<String, String> map = Collections.singletonMap(NODE_UUID, response.getNodeIdentifier().getId());
controller.getStateManagerProvider().getStateManager(CLUSTER_NODE_CONFIG).setState(map, Scope.LOCAL);
} catch (final IOException ioe) {
logger.warn("Received successful response from Cluster Manager but failed to persist state about the Node's Unique Identifier and the Node's Index. " + "This node may be assigned a different UUID when the node is restarted.", ioe);
}
return response;
}
} finally {
readLock.unlock();
}
}
use of org.apache.nifi.cluster.ConnectionException in project nifi by apache.
the class StandardFlowService method loadFromConnectionResponse.
private void loadFromConnectionResponse(final ConnectionResponse response) throws ConnectionException {
writeLock.lock();
try {
if (response.getNodeConnectionStatuses() != null) {
clusterCoordinator.resetNodeStatuses(response.getNodeConnectionStatuses().stream().collect(Collectors.toMap(status -> status.getNodeIdentifier(), status -> status)));
}
// get the dataflow from the response
final DataFlow dataFlow = response.getDataFlow();
if (logger.isTraceEnabled()) {
logger.trace("ResponseFlow = " + new String(dataFlow.getFlow(), StandardCharsets.UTF_8));
}
// load new controller state
loadFromBytes(dataFlow, true);
// set node ID on controller before we start heartbeating because heartbeat needs node ID
nodeId = response.getNodeIdentifier();
logger.info("Setting Flow Controller's Node ID: " + nodeId);
controller.setNodeId(nodeId);
clusterCoordinator.setLocalNodeIdentifier(nodeId);
clusterCoordinator.setConnected(true);
revisionManager.reset(response.getComponentRevisions().stream().map(rev -> rev.toRevision()).collect(Collectors.toList()));
// mark the node as clustered
controller.setClustered(true, response.getInstanceId());
controller.setConnectionStatus(new NodeConnectionStatus(nodeId, NodeConnectionState.CONNECTED));
// Initialize the controller after the flow is loaded so we don't take any actions on repos until everything is good
initializeController();
// start the processors as indicated by the dataflow
controller.onFlowInitialized(autoResumeState);
loadSnippets(dataFlow.getSnippets());
controller.startHeartbeating();
} catch (final UninheritableFlowException ufe) {
throw new UninheritableFlowException(CONNECTION_EXCEPTION_MSG_PREFIX + "local flow is different than cluster flow.", ufe);
} catch (final MissingBundleException mbe) {
throw new MissingBundleException(CONNECTION_EXCEPTION_MSG_PREFIX + "cluster flow contains bundles that do not exist on the current node", mbe);
} catch (final FlowSerializationException fse) {
throw new ConnectionException(CONNECTION_EXCEPTION_MSG_PREFIX + "local or cluster flow is malformed.", fse);
} catch (final FlowSynchronizationException fse) {
throw new FlowSynchronizationException(CONNECTION_EXCEPTION_MSG_PREFIX + "local flow controller partially updated. " + "Administrator should disconnect node and review flow for corruption.", fse);
} catch (final Exception ex) {
throw new ConnectionException("Failed to connect node to cluster due to: " + ex, ex);
} finally {
writeLock.unlock();
}
}
Aggregations