use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.
the class ODistributedWorker method onMessage.
/**
* Executes the remote call on the local node and send back the result
*/
protected void onMessage(final ODistributedRequest iRequest) {
String senderNodeName = null;
for (int retry = 0; retry < 10; retry++) {
senderNodeName = manager.getNodeNameById(iRequest.getId().getNodeId());
if (senderNodeName != null)
break;
try {
Thread.sleep(200);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new ODistributedException("Execution has been interrupted");
}
}
if (senderNodeName == null) {
ODistributedServerLog.warn(this, localNodeName, senderNodeName, DIRECTION.IN, "Sender server id %d is not registered in the cluster configuration, discard the request: (%s) (worker=%d)", iRequest.getId().getNodeId(), iRequest, id);
sendResponseBack(iRequest, new ODistributedException("Sender server id " + iRequest.getId().getNodeId() + " is not registered in the cluster configuration, discard the request"));
return;
}
final ORemoteTask task = iRequest.getTask();
if (ODistributedServerLog.isDebugEnabled())
ODistributedServerLog.debug(this, localNodeName, senderNodeName, DIRECTION.IN, "Received request: (%s) (worker=%d)", iRequest, id);
// EXECUTE IT LOCALLY
Object responsePayload = null;
OSecurityUser origin = null;
try {
waitNodeIsOnline();
distributed.waitIsReady(task);
if (task.isUsingDatabase()) {
initDatabaseInstance();
if (database == null)
throw new ODistributedOperationException("Error on executing remote request because the database '" + databaseName + "' is not available");
}
// reset to original user
if (database != null) {
database.activateOnCurrentThread();
origin = database.getUser();
try {
if (iRequest.getUserRID() != null && iRequest.getUserRID().isValid() && (lastUser == null || !(lastUser.getIdentity()).equals(iRequest.getUserRID()))) {
lastUser = database.getMetadata().getSecurity().getUser(iRequest.getUserRID());
// set to new user
database.setUser(lastUser);
} else
origin = null;
} catch (Throwable ex) {
OLogManager.instance().error(this, "Failed on user switching database. " + ex.getMessage());
}
}
// EXECUTE THE TASK
for (int retry = 1; running; ++retry) {
responsePayload = manager.executeOnLocalNode(iRequest.getId(), iRequest.getTask(), database);
if (responsePayload instanceof OModificationOperationProhibitedException) {
// RETRY
try {
ODistributedServerLog.info(this, localNodeName, senderNodeName, DIRECTION.IN, "Database is frozen, waiting and retrying. Request %s (retry=%d, worker=%d)", iRequest, retry, id);
Thread.sleep(1000);
} catch (InterruptedException e) {
}
} else {
// OPERATION EXECUTED (OK OR ERROR), NO RETRY NEEDED
if (retry > 1)
ODistributedServerLog.info(this, localNodeName, senderNodeName, DIRECTION.IN, "Request %s succeed after retry=%d", iRequest, retry);
break;
}
}
} catch (RuntimeException e) {
sendResponseBack(iRequest, e);
throw e;
} finally {
if (database != null && !database.isClosed()) {
database.activateOnCurrentThread();
if (!database.isClosed()) {
database.rollback();
database.getLocalCache().clear();
if (origin != null)
database.setUser(origin);
}
}
}
sendResponseBack(iRequest, responsePayload);
}
use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.
the class ODistributedDatabaseImpl method processRequest.
/**
* Distributed requests against the available workers by using one queue per worker. This guarantee the sequence of the operations
* against the same record cluster.
*/
public void processRequest(final ODistributedRequest request) {
if (!running)
// DISCARD IT
return;
final ORemoteTask task = request.getTask();
waitIsReady(task);
if (!running)
// DISCARD IT
return;
totalReceivedRequests.incrementAndGet();
// final ODistributedMomentum lastMomentum = filterByMomentum.get();
// if (lastMomentum != null && task instanceof OAbstractReplicatedTask) {
// final OLogSequenceNumber taskLastLSN = ((OAbstractReplicatedTask) task).getLastLSN();
//
// final String sourceServer = manager.getNodeNameById(request.getId().getNodeId());
// final OLogSequenceNumber lastLSNFromMomentum = lastMomentum.getLSN(sourceServer);
//
// if (taskLastLSN != null && lastLSNFromMomentum != null && taskLastLSN.compareTo(lastLSNFromMomentum) < 0) {
// // SKIP REQUEST BECAUSE CONTAINS AN OLD LSN
// final String msg = String.format("Skipped request %s on database '%s' because %s < current %s", request, databaseName,
// taskLastLSN, lastLSNFromMomentum);
// ODistributedServerLog.info(this, localNodeName, null, DIRECTION.NONE, msg);
// ODistributedWorker.sendResponseBack(this, manager, request, new ODistributedException(msg));
// return;
// }
// }
final int[] partitionKeys = task.getPartitionKey();
if (ODistributedServerLog.isDebugEnabled())
ODistributedServerLog.debug(this, localNodeName, task.getNodeSource(), DIRECTION.IN, "Request %s on database '%s' partitionKeys=%s task=%s", request, databaseName, Arrays.toString(partitionKeys), task);
if (partitionKeys.length > 1 || partitionKeys[0] == -1) {
final Set<Integer> involvedWorkerQueues;
if (partitionKeys.length > 1)
involvedWorkerQueues = getInvolvedQueuesByPartitionKeys(partitionKeys);
else
// LOCK ALL THE QUEUES
involvedWorkerQueues = ALL_QUEUES;
// if (ODistributedServerLog.isDebugEnabled())
ODistributedServerLog.debug(this, localNodeName, null, DIRECTION.NONE, "Request %s on database '%s' involvedQueues=%s", request, databaseName, involvedWorkerQueues);
if (involvedWorkerQueues.size() == 1)
// JUST ONE QUEUE INVOLVED: PROCESS IT IMMEDIATELY
processRequest(involvedWorkerQueues.iterator().next(), request);
else {
// INVOLVING MULTIPLE QUEUES
// if (ODistributedServerLog.isDebugEnabled())
ODistributedServerLog.debug(this, localNodeName, null, DIRECTION.NONE, "Request %s on database '%s' waiting for all the previous requests to be completed", request, databaseName);
// WAIT ALL THE INVOLVED QUEUES ARE FREE AND SYNCHRONIZED
final CountDownLatch syncLatch = new CountDownLatch(involvedWorkerQueues.size());
final ODistributedRequest syncRequest = new ODistributedRequest(manager.getTaskFactory(), request.getId().getNodeId(), -1, databaseName, new OSynchronizedTaskWrapper(syncLatch));
for (int queue : involvedWorkerQueues) workerThreads.get(queue).processRequest(syncRequest);
long taskTimeout = task.getDistributedTimeout();
try {
if (taskTimeout <= 0)
syncLatch.await();
else {
// WAIT FOR COMPLETION. THE TIMEOUT IS MANAGED IN SMALLER CYCLES TO PROPERLY RECOGNIZE WHEN THE DB IS REMOVED
final long start = System.currentTimeMillis();
final long cycleTimeout = Math.min(taskTimeout, 2000);
boolean locked = false;
do {
if (syncLatch.await(cycleTimeout, TimeUnit.MILLISECONDS)) {
// DONE
locked = true;
break;
}
if (this.workerThreads.size() == 0)
// DATABASE WAS SHUTDOWN
break;
} while (System.currentTimeMillis() - start < taskTimeout);
if (!locked) {
final String msg = String.format("Cannot execute distributed request (%s) because all worker threads (%d) are busy (pending=%d)", request, workerThreads.size(), syncLatch.getCount());
ODistributedWorker.sendResponseBack(this, manager, request, new ODistributedOperationException(msg));
return;
}
}
} catch (InterruptedException e) {
// IGNORE
Thread.currentThread().interrupt();
final String msg = String.format("Cannot execute distributed request (%s) because all worker threads (%d) are busy", request, workerThreads.size());
ODistributedWorker.sendResponseBack(this, manager, request, new ODistributedOperationException(msg));
return;
}
// PUT THE TASK TO EXECUTE ONLY IN THE FIRST QUEUE AND PUT WAIT-FOR TASKS IN THE OTHERS. WHEN THE REAL TASK IS EXECUTED,
// ALL THE OTHER TASKS WILL RETURN, SO THE QUEUES WILL BE BUSY DURING THE EXECUTION OF THE TASK. THIS AVOID CONCURRENT
// EXECUTION FOR THE SAME PARTITION
final CountDownLatch queueLatch = new CountDownLatch(1);
int i = 0;
for (int queue : involvedWorkerQueues) {
final ODistributedRequest req;
if (i++ == 0) {
// USE THE FIRST QUEUE TO PROCESS THE REQUEST
final String senderNodeName = manager.getNodeNameById(request.getId().getNodeId());
request.setTask(new OSynchronizedTaskWrapper(queueLatch, senderNodeName, task));
req = request;
} else
req = new ODistributedRequest(manager.getTaskFactory(), request.getId().getNodeId(), -1, databaseName, new OWaitForTask(queueLatch));
workerThreads.get(queue).processRequest(req);
}
}
} else if (partitionKeys.length > 1 || partitionKeys[0] == -2) {
// ANY PARTITION: USE THE FIRST EMPTY IF ANY, OTHERWISE THE FIRST IN THE LIST
boolean found = false;
for (ODistributedWorker q : workerThreads) {
if (q.isWaitingForNextRequest() && q.localQueue.isEmpty()) {
q.processRequest(request);
found = true;
break;
}
}
if (!found)
// ALL THE THREADS ARE BUSY, SELECT THE FIRST EMPTY ONE
for (ODistributedWorker q : workerThreads) {
if (q.localQueue.isEmpty()) {
q.processRequest(request);
found = true;
break;
}
}
if (!found)
// EXEC ON THE FIRST QUEUE
workerThreads.get(0).processRequest(request);
} else {
processRequest(partitionKeys[0], request);
}
}
use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.
the class OClusterHealthChecker method checkServerInStall.
private void checkServerInStall() {
if (manager.getNodeStatus() != ODistributedServerManager.NODE_STATUS.ONLINE)
// ONLY ONLINE NODE CAN CHECK FOR OTHERS
return;
for (String dbName : manager.getMessageService().getDatabases()) {
final ODistributedServerManager.DB_STATUS localNodeStatus = manager.getDatabaseStatus(manager.getLocalNodeName(), dbName);
if (localNodeStatus != ODistributedServerManager.DB_STATUS.ONLINE)
// ONLY ONLINE NODE/DB CAN CHECK FOR OTHERS
continue;
final List<String> servers = manager.getOnlineNodes(dbName);
servers.remove(manager.getLocalNodeName());
if (servers.isEmpty())
continue;
if (ODistributedServerLog.isDebugEnabled())
ODistributedServerLog.debug(this, manager.getLocalNodeName(), servers.toString(), ODistributedServerLog.DIRECTION.OUT, "Sending heartbeat message to servers (db=%s)", dbName);
try {
final ODistributedResponse response = manager.sendRequest(dbName, null, servers, new OHeartbeatTask(), manager.getNextMessageIdCounter(), ODistributedRequest.EXECUTION_MODE.RESPONSE, null, null);
final Object payload = response != null ? response.getPayload() : null;
if (payload instanceof Map) {
final Map<String, Object> responses = (Map<String, Object>) payload;
servers.removeAll(responses.keySet());
}
} catch (ODistributedOperationException e) {
// NO SERVER RESPONDED, THE SERVER COULD BE ISOLATED: SET ALL THE SERVER AS OFFLINE
}
for (String server : servers) {
setDatabaseOffline(dbName, server);
}
}
}
use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.
the class OClusterHealthChecker method checkServerConfig.
private void checkServerConfig() {
// NO NODES CONFIGURED: CHECK IF THERE IS ANY MISCONFIGURATION BY CHECKING THE DATABASE STATUSES
for (String databaseName : manager.getMessageService().getDatabases()) {
final ODistributedConfiguration cfg = manager.getDatabaseConfiguration(databaseName);
final Set<String> confServers = cfg.getServers(null);
for (String s : manager.getActiveServers()) {
if (manager.isNodeAvailable(s, databaseName) && !confServers.contains(s)) {
final List<String> nodes = new ArrayList<String>();
for (String n : manager.getActiveServers()) {
if (manager.isNodeAvailable(n, databaseName))
nodes.add(n);
}
// THE SERVERS HAS THE DATABASE ONLINE BUT IT IS NOT IN THE CFG. DETERMINE THE MOST UPD CFG
try {
final ODistributedResponse response = manager.sendRequest(databaseName, null, nodes, new ORequestDatabaseConfigurationTask(databaseName), manager.getNextMessageIdCounter(), ODistributedRequest.EXECUTION_MODE.RESPONSE, null, null);
final Object payload = response != null ? response.getPayload() : null;
if (payload instanceof Map) {
String mostUpdatedServer = null;
int mostUpdatedServerVersion = -1;
final Map<String, Object> responses = (Map<String, Object>) payload;
for (Map.Entry<String, Object> r : responses.entrySet()) {
if (r.getValue() instanceof ODocument) {
final ODocument doc = (ODocument) r.getValue();
int v = doc.field("version");
if (v > mostUpdatedServerVersion) {
mostUpdatedServerVersion = v;
mostUpdatedServer = r.getKey();
}
}
}
if (cfg.getVersion() < mostUpdatedServerVersion) {
// OVERWRITE DB VERSION
((ODistributedStorage) manager.getStorage(databaseName)).setDistributedConfiguration(new OModifiableDistributedConfiguration((ODocument) responses.get(mostUpdatedServer)));
}
}
} catch (ODistributedOperationException e) {
// NO SERVER RESPONDED, THE SERVER COULD BE ISOLATED: SET ALL THE SERVER AS OFFLINE
}
}
}
}
}
use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.
the class ODistributedResponseManager method getFinalResponse.
public ODistributedResponse getFinalResponse() {
synchronousResponsesLock.lock();
try {
final RuntimeException failure = manageConflicts();
if (failure != null)
return new ODistributedResponse(request.getId(), dManager.getLocalNodeName(), dManager.getLocalNodeName(), failure);
if (receivedResponses == 0) {
if (quorum > 0 && !request.getTask().isIdempotent())
throw new ODistributedOperationException("No response received from any of nodes " + getExpectedNodes() + " for request " + request + " after " + ((System.nanoTime() - sentOn) / 1000000) + "ms");
// NO QUORUM, RETURN NULL
return null;
}
// MANAGE THE RESULT BASED ON RESULT STRATEGY
switch(request.getTask().getResultStrategy()) {
case ANY:
// DEFAULT: RETURN BEST ANSWER
break;
case UNION:
{
// COLLECT ALL THE RESPONSE IN A MAP OF <NODE, RESULT>
final Map<String, Object> payloads = new HashMap<String, Object>();
for (Map.Entry<String, Object> entry : responses.entrySet()) if (entry.getValue() != NO_RESPONSE)
payloads.put(entry.getKey(), ((ODistributedResponse) entry.getValue()).getPayload());
if (payloads.isEmpty())
return null;
final ODistributedResponse response = (ODistributedResponse) getReceivedResponses().iterator().next();
response.setExecutorNodeName(responses.keySet().toString());
response.setPayload(payloads);
return response;
}
}
final int bestResponsesGroupIndex = getBestResponsesGroup();
final List<ODistributedResponse> bestResponsesGroup = responseGroups.get(bestResponsesGroupIndex);
return bestResponsesGroup.get(0);
} finally {
synchronousResponsesLock.unlock();
}
}
Aggregations