Search in sources :

Example 6 with ODistributedOperationException

use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.

the class ODistributedWorker method onMessage.

/**
   * Executes the remote call on the local node and send back the result
   */
protected void onMessage(final ODistributedRequest iRequest) {
    String senderNodeName = null;
    for (int retry = 0; retry < 10; retry++) {
        senderNodeName = manager.getNodeNameById(iRequest.getId().getNodeId());
        if (senderNodeName != null)
            break;
        try {
            Thread.sleep(200);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new ODistributedException("Execution has been interrupted");
        }
    }
    if (senderNodeName == null) {
        ODistributedServerLog.warn(this, localNodeName, senderNodeName, DIRECTION.IN, "Sender server id %d is not registered in the cluster configuration, discard the request: (%s) (worker=%d)", iRequest.getId().getNodeId(), iRequest, id);
        sendResponseBack(iRequest, new ODistributedException("Sender server id " + iRequest.getId().getNodeId() + " is not registered in the cluster configuration, discard the request"));
        return;
    }
    final ORemoteTask task = iRequest.getTask();
    if (ODistributedServerLog.isDebugEnabled())
        ODistributedServerLog.debug(this, localNodeName, senderNodeName, DIRECTION.IN, "Received request: (%s) (worker=%d)", iRequest, id);
    // EXECUTE IT LOCALLY
    Object responsePayload = null;
    OSecurityUser origin = null;
    try {
        waitNodeIsOnline();
        distributed.waitIsReady(task);
        if (task.isUsingDatabase()) {
            initDatabaseInstance();
            if (database == null)
                throw new ODistributedOperationException("Error on executing remote request because the database '" + databaseName + "' is not available");
        }
        // reset to original user
        if (database != null) {
            database.activateOnCurrentThread();
            origin = database.getUser();
            try {
                if (iRequest.getUserRID() != null && iRequest.getUserRID().isValid() && (lastUser == null || !(lastUser.getIdentity()).equals(iRequest.getUserRID()))) {
                    lastUser = database.getMetadata().getSecurity().getUser(iRequest.getUserRID());
                    // set to new user
                    database.setUser(lastUser);
                } else
                    origin = null;
            } catch (Throwable ex) {
                OLogManager.instance().error(this, "Failed on user switching database. " + ex.getMessage());
            }
        }
        // EXECUTE THE TASK
        for (int retry = 1; running; ++retry) {
            responsePayload = manager.executeOnLocalNode(iRequest.getId(), iRequest.getTask(), database);
            if (responsePayload instanceof OModificationOperationProhibitedException) {
                // RETRY
                try {
                    ODistributedServerLog.info(this, localNodeName, senderNodeName, DIRECTION.IN, "Database is frozen, waiting and retrying. Request %s (retry=%d, worker=%d)", iRequest, retry, id);
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                }
            } else {
                // OPERATION EXECUTED (OK OR ERROR), NO RETRY NEEDED
                if (retry > 1)
                    ODistributedServerLog.info(this, localNodeName, senderNodeName, DIRECTION.IN, "Request %s succeed after retry=%d", iRequest, retry);
                break;
            }
        }
    } catch (RuntimeException e) {
        sendResponseBack(iRequest, e);
        throw e;
    } finally {
        if (database != null && !database.isClosed()) {
            database.activateOnCurrentThread();
            if (!database.isClosed()) {
                database.rollback();
                database.getLocalCache().clear();
                if (origin != null)
                    database.setUser(origin);
            }
        }
    }
    sendResponseBack(iRequest, responsePayload);
}
Also used : ORemoteTask(com.orientechnologies.orient.server.distributed.task.ORemoteTask) ODistributedOperationException(com.orientechnologies.orient.server.distributed.task.ODistributedOperationException) OModificationOperationProhibitedException(com.orientechnologies.common.concur.lock.OModificationOperationProhibitedException) OSecurityUser(com.orientechnologies.orient.core.metadata.security.OSecurityUser)

Example 7 with ODistributedOperationException

use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.

the class ODistributedDatabaseImpl method processRequest.

/**
   * Distributed requests against the available workers by using one queue per worker. This guarantee the sequence of the operations
   * against the same record cluster.
   */
public void processRequest(final ODistributedRequest request) {
    if (!running)
        // DISCARD IT
        return;
    final ORemoteTask task = request.getTask();
    waitIsReady(task);
    if (!running)
        // DISCARD IT
        return;
    totalReceivedRequests.incrementAndGet();
    // final ODistributedMomentum lastMomentum = filterByMomentum.get();
    // if (lastMomentum != null && task instanceof OAbstractReplicatedTask) {
    // final OLogSequenceNumber taskLastLSN = ((OAbstractReplicatedTask) task).getLastLSN();
    //
    // final String sourceServer = manager.getNodeNameById(request.getId().getNodeId());
    // final OLogSequenceNumber lastLSNFromMomentum = lastMomentum.getLSN(sourceServer);
    //
    // if (taskLastLSN != null && lastLSNFromMomentum != null && taskLastLSN.compareTo(lastLSNFromMomentum) < 0) {
    // // SKIP REQUEST BECAUSE CONTAINS AN OLD LSN
    // final String msg = String.format("Skipped request %s on database '%s' because %s < current %s", request, databaseName,
    // taskLastLSN, lastLSNFromMomentum);
    // ODistributedServerLog.info(this, localNodeName, null, DIRECTION.NONE, msg);
    // ODistributedWorker.sendResponseBack(this, manager, request, new ODistributedException(msg));
    // return;
    // }
    // }
    final int[] partitionKeys = task.getPartitionKey();
    if (ODistributedServerLog.isDebugEnabled())
        ODistributedServerLog.debug(this, localNodeName, task.getNodeSource(), DIRECTION.IN, "Request %s on database '%s' partitionKeys=%s task=%s", request, databaseName, Arrays.toString(partitionKeys), task);
    if (partitionKeys.length > 1 || partitionKeys[0] == -1) {
        final Set<Integer> involvedWorkerQueues;
        if (partitionKeys.length > 1)
            involvedWorkerQueues = getInvolvedQueuesByPartitionKeys(partitionKeys);
        else
            // LOCK ALL THE QUEUES
            involvedWorkerQueues = ALL_QUEUES;
        // if (ODistributedServerLog.isDebugEnabled())
        ODistributedServerLog.debug(this, localNodeName, null, DIRECTION.NONE, "Request %s on database '%s' involvedQueues=%s", request, databaseName, involvedWorkerQueues);
        if (involvedWorkerQueues.size() == 1)
            // JUST ONE QUEUE INVOLVED: PROCESS IT IMMEDIATELY
            processRequest(involvedWorkerQueues.iterator().next(), request);
        else {
            // INVOLVING MULTIPLE QUEUES
            // if (ODistributedServerLog.isDebugEnabled())
            ODistributedServerLog.debug(this, localNodeName, null, DIRECTION.NONE, "Request %s on database '%s' waiting for all the previous requests to be completed", request, databaseName);
            // WAIT ALL THE INVOLVED QUEUES ARE FREE AND SYNCHRONIZED
            final CountDownLatch syncLatch = new CountDownLatch(involvedWorkerQueues.size());
            final ODistributedRequest syncRequest = new ODistributedRequest(manager.getTaskFactory(), request.getId().getNodeId(), -1, databaseName, new OSynchronizedTaskWrapper(syncLatch));
            for (int queue : involvedWorkerQueues) workerThreads.get(queue).processRequest(syncRequest);
            long taskTimeout = task.getDistributedTimeout();
            try {
                if (taskTimeout <= 0)
                    syncLatch.await();
                else {
                    // WAIT FOR COMPLETION. THE TIMEOUT IS MANAGED IN SMALLER CYCLES TO PROPERLY RECOGNIZE WHEN THE DB IS REMOVED
                    final long start = System.currentTimeMillis();
                    final long cycleTimeout = Math.min(taskTimeout, 2000);
                    boolean locked = false;
                    do {
                        if (syncLatch.await(cycleTimeout, TimeUnit.MILLISECONDS)) {
                            // DONE
                            locked = true;
                            break;
                        }
                        if (this.workerThreads.size() == 0)
                            // DATABASE WAS SHUTDOWN
                            break;
                    } while (System.currentTimeMillis() - start < taskTimeout);
                    if (!locked) {
                        final String msg = String.format("Cannot execute distributed request (%s) because all worker threads (%d) are busy (pending=%d)", request, workerThreads.size(), syncLatch.getCount());
                        ODistributedWorker.sendResponseBack(this, manager, request, new ODistributedOperationException(msg));
                        return;
                    }
                }
            } catch (InterruptedException e) {
                // IGNORE
                Thread.currentThread().interrupt();
                final String msg = String.format("Cannot execute distributed request (%s) because all worker threads (%d) are busy", request, workerThreads.size());
                ODistributedWorker.sendResponseBack(this, manager, request, new ODistributedOperationException(msg));
                return;
            }
            // PUT THE TASK TO EXECUTE ONLY IN THE FIRST QUEUE AND PUT WAIT-FOR TASKS IN THE OTHERS. WHEN THE REAL TASK IS EXECUTED,
            // ALL THE OTHER TASKS WILL RETURN, SO THE QUEUES WILL BE BUSY DURING THE EXECUTION OF THE TASK. THIS AVOID CONCURRENT
            // EXECUTION FOR THE SAME PARTITION
            final CountDownLatch queueLatch = new CountDownLatch(1);
            int i = 0;
            for (int queue : involvedWorkerQueues) {
                final ODistributedRequest req;
                if (i++ == 0) {
                    // USE THE FIRST QUEUE TO PROCESS THE REQUEST
                    final String senderNodeName = manager.getNodeNameById(request.getId().getNodeId());
                    request.setTask(new OSynchronizedTaskWrapper(queueLatch, senderNodeName, task));
                    req = request;
                } else
                    req = new ODistributedRequest(manager.getTaskFactory(), request.getId().getNodeId(), -1, databaseName, new OWaitForTask(queueLatch));
                workerThreads.get(queue).processRequest(req);
            }
        }
    } else if (partitionKeys.length > 1 || partitionKeys[0] == -2) {
        // ANY PARTITION: USE THE FIRST EMPTY IF ANY, OTHERWISE THE FIRST IN THE LIST
        boolean found = false;
        for (ODistributedWorker q : workerThreads) {
            if (q.isWaitingForNextRequest() && q.localQueue.isEmpty()) {
                q.processRequest(request);
                found = true;
                break;
            }
        }
        if (!found)
            // ALL THE THREADS ARE BUSY, SELECT THE FIRST EMPTY ONE
            for (ODistributedWorker q : workerThreads) {
                if (q.localQueue.isEmpty()) {
                    q.processRequest(request);
                    found = true;
                    break;
                }
            }
        if (!found)
            // EXEC ON THE FIRST QUEUE
            workerThreads.get(0).processRequest(request);
    } else {
        processRequest(partitionKeys[0], request);
    }
}
Also used : ORemoteTask(com.orientechnologies.orient.server.distributed.task.ORemoteTask) CountDownLatch(java.util.concurrent.CountDownLatch) ODistributedOperationException(com.orientechnologies.orient.server.distributed.task.ODistributedOperationException) OWaitForTask(com.orientechnologies.orient.server.distributed.impl.task.OWaitForTask)

Example 8 with ODistributedOperationException

use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.

the class OClusterHealthChecker method checkServerInStall.

private void checkServerInStall() {
    if (manager.getNodeStatus() != ODistributedServerManager.NODE_STATUS.ONLINE)
        // ONLY ONLINE NODE CAN CHECK FOR OTHERS
        return;
    for (String dbName : manager.getMessageService().getDatabases()) {
        final ODistributedServerManager.DB_STATUS localNodeStatus = manager.getDatabaseStatus(manager.getLocalNodeName(), dbName);
        if (localNodeStatus != ODistributedServerManager.DB_STATUS.ONLINE)
            // ONLY ONLINE NODE/DB CAN CHECK FOR OTHERS
            continue;
        final List<String> servers = manager.getOnlineNodes(dbName);
        servers.remove(manager.getLocalNodeName());
        if (servers.isEmpty())
            continue;
        if (ODistributedServerLog.isDebugEnabled())
            ODistributedServerLog.debug(this, manager.getLocalNodeName(), servers.toString(), ODistributedServerLog.DIRECTION.OUT, "Sending heartbeat message to servers (db=%s)", dbName);
        try {
            final ODistributedResponse response = manager.sendRequest(dbName, null, servers, new OHeartbeatTask(), manager.getNextMessageIdCounter(), ODistributedRequest.EXECUTION_MODE.RESPONSE, null, null);
            final Object payload = response != null ? response.getPayload() : null;
            if (payload instanceof Map) {
                final Map<String, Object> responses = (Map<String, Object>) payload;
                servers.removeAll(responses.keySet());
            }
        } catch (ODistributedOperationException e) {
        // NO SERVER RESPONDED, THE SERVER COULD BE ISOLATED: SET ALL THE SERVER AS OFFLINE
        }
        for (String server : servers) {
            setDatabaseOffline(dbName, server);
        }
    }
}
Also used : OHeartbeatTask(com.orientechnologies.orient.server.distributed.impl.task.OHeartbeatTask) ODistributedOperationException(com.orientechnologies.orient.server.distributed.task.ODistributedOperationException)

Example 9 with ODistributedOperationException

use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.

the class OClusterHealthChecker method checkServerConfig.

private void checkServerConfig() {
    // NO NODES CONFIGURED: CHECK IF THERE IS ANY MISCONFIGURATION BY CHECKING THE DATABASE STATUSES
    for (String databaseName : manager.getMessageService().getDatabases()) {
        final ODistributedConfiguration cfg = manager.getDatabaseConfiguration(databaseName);
        final Set<String> confServers = cfg.getServers(null);
        for (String s : manager.getActiveServers()) {
            if (manager.isNodeAvailable(s, databaseName) && !confServers.contains(s)) {
                final List<String> nodes = new ArrayList<String>();
                for (String n : manager.getActiveServers()) {
                    if (manager.isNodeAvailable(n, databaseName))
                        nodes.add(n);
                }
                // THE SERVERS HAS THE DATABASE ONLINE BUT IT IS NOT IN THE CFG. DETERMINE THE MOST UPD CFG
                try {
                    final ODistributedResponse response = manager.sendRequest(databaseName, null, nodes, new ORequestDatabaseConfigurationTask(databaseName), manager.getNextMessageIdCounter(), ODistributedRequest.EXECUTION_MODE.RESPONSE, null, null);
                    final Object payload = response != null ? response.getPayload() : null;
                    if (payload instanceof Map) {
                        String mostUpdatedServer = null;
                        int mostUpdatedServerVersion = -1;
                        final Map<String, Object> responses = (Map<String, Object>) payload;
                        for (Map.Entry<String, Object> r : responses.entrySet()) {
                            if (r.getValue() instanceof ODocument) {
                                final ODocument doc = (ODocument) r.getValue();
                                int v = doc.field("version");
                                if (v > mostUpdatedServerVersion) {
                                    mostUpdatedServerVersion = v;
                                    mostUpdatedServer = r.getKey();
                                }
                            }
                        }
                        if (cfg.getVersion() < mostUpdatedServerVersion) {
                            // OVERWRITE DB VERSION
                            ((ODistributedStorage) manager.getStorage(databaseName)).setDistributedConfiguration(new OModifiableDistributedConfiguration((ODocument) responses.get(mostUpdatedServer)));
                        }
                    }
                } catch (ODistributedOperationException e) {
                // NO SERVER RESPONDED, THE SERVER COULD BE ISOLATED: SET ALL THE SERVER AS OFFLINE
                }
            }
        }
    }
}
Also used : ODistributedOperationException(com.orientechnologies.orient.server.distributed.task.ODistributedOperationException) ORequestDatabaseConfigurationTask(com.orientechnologies.orient.server.distributed.impl.task.ORequestDatabaseConfigurationTask) ODocument(com.orientechnologies.orient.core.record.impl.ODocument)

Example 10 with ODistributedOperationException

use of com.orientechnologies.orient.server.distributed.task.ODistributedOperationException in project orientdb by orientechnologies.

the class ODistributedResponseManager method getFinalResponse.

public ODistributedResponse getFinalResponse() {
    synchronousResponsesLock.lock();
    try {
        final RuntimeException failure = manageConflicts();
        if (failure != null)
            return new ODistributedResponse(request.getId(), dManager.getLocalNodeName(), dManager.getLocalNodeName(), failure);
        if (receivedResponses == 0) {
            if (quorum > 0 && !request.getTask().isIdempotent())
                throw new ODistributedOperationException("No response received from any of nodes " + getExpectedNodes() + " for request " + request + " after " + ((System.nanoTime() - sentOn) / 1000000) + "ms");
            // NO QUORUM, RETURN NULL
            return null;
        }
        // MANAGE THE RESULT BASED ON RESULT STRATEGY
        switch(request.getTask().getResultStrategy()) {
            case ANY:
                // DEFAULT: RETURN BEST ANSWER
                break;
            case UNION:
                {
                    // COLLECT ALL THE RESPONSE IN A MAP OF <NODE, RESULT>
                    final Map<String, Object> payloads = new HashMap<String, Object>();
                    for (Map.Entry<String, Object> entry : responses.entrySet()) if (entry.getValue() != NO_RESPONSE)
                        payloads.put(entry.getKey(), ((ODistributedResponse) entry.getValue()).getPayload());
                    if (payloads.isEmpty())
                        return null;
                    final ODistributedResponse response = (ODistributedResponse) getReceivedResponses().iterator().next();
                    response.setExecutorNodeName(responses.keySet().toString());
                    response.setPayload(payloads);
                    return response;
                }
        }
        final int bestResponsesGroupIndex = getBestResponsesGroup();
        final List<ODistributedResponse> bestResponsesGroup = responseGroups.get(bestResponsesGroupIndex);
        return bestResponsesGroup.get(0);
    } finally {
        synchronousResponsesLock.unlock();
    }
}
Also used : ODistributedOperationException(com.orientechnologies.orient.server.distributed.task.ODistributedOperationException)

Aggregations

ODistributedOperationException (com.orientechnologies.orient.server.distributed.task.ODistributedOperationException)10 ORemoteTask (com.orientechnologies.orient.server.distributed.task.ORemoteTask)2 OLockException (com.orientechnologies.common.concur.lock.OLockException)1 OModificationOperationProhibitedException (com.orientechnologies.common.concur.lock.OModificationOperationProhibitedException)1 ODatabaseDocumentTx (com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx)1 OConcurrentCreateException (com.orientechnologies.orient.core.exception.OConcurrentCreateException)1 OClass (com.orientechnologies.orient.core.metadata.schema.OClass)1 OSecurityUser (com.orientechnologies.orient.core.metadata.security.OSecurityUser)1 ORecord (com.orientechnologies.orient.core.record.ORecord)1 ODocument (com.orientechnologies.orient.core.record.impl.ODocument)1 OCommandSQL (com.orientechnologies.orient.core.sql.OCommandSQL)1 ODistributedLockTask (com.orientechnologies.orient.server.distributed.impl.task.ODistributedLockTask)1 OHeartbeatTask (com.orientechnologies.orient.server.distributed.impl.task.OHeartbeatTask)1 ORequestDatabaseConfigurationTask (com.orientechnologies.orient.server.distributed.impl.task.ORequestDatabaseConfigurationTask)1 OWaitForTask (com.orientechnologies.orient.server.distributed.impl.task.OWaitForTask)1 ODistributedRecordLockedException (com.orientechnologies.orient.server.distributed.task.ODistributedRecordLockedException)1 OrientBaseGraph (com.tinkerpop.blueprints.impls.orient.OrientBaseGraph)1 OrientVertex (com.tinkerpop.blueprints.impls.orient.OrientVertex)1 HashSet (java.util.HashSet)1 CountDownLatch (java.util.concurrent.CountDownLatch)1