Search in sources :

Example 1 with VoldemortRebalancingException

use of voldemort.server.rebalance.VoldemortRebalancingException in project voldemort by voldemort.

the class StealerBasedRebalanceAsyncOperation method operate.

@Override
public void operate() throws Exception {
    adminClient = AdminClient.createTempAdminClient(voldemortConfig, metadataStore.getCluster(), voldemortConfig.getMaxParallelStoresRebalancing());
    final List<Exception> failures = new ArrayList<Exception>();
    final ConcurrentLinkedQueue<String> storesRebalancing = new ConcurrentLinkedQueue<String>();
    final AtomicInteger completedStoresCount = new AtomicInteger(0);
    final int totalStoresCount = stealInfo.getPartitionStores().size();
    try {
        for (final String storeName : ImmutableList.copyOf(stealInfo.getPartitionStores())) {
            executors.submit(new Runnable() {

                @Override
                public void run() {
                    try {
                        boolean isReadOnlyStore = metadataStore.getStoreDef(storeName).getType().compareTo(ReadOnlyStorageConfiguration.TYPE_NAME) == 0;
                        // Add the store to the rebalancing list
                        storesRebalancing.add(storeName);
                        updateStatus(getHeader(stealInfo) + "Completed working on " + completedStoresCount.get() + " out of " + totalStoresCount + " stores. Still rebalancing " + storesRebalancing);
                        // Start the rebalance..
                        rebalanceStore(storeName, adminClient, stealInfo, isReadOnlyStore);
                        // We finished the store, delete it
                        stealInfo.removeStore(storeName);
                        storesRebalancing.remove(storeName);
                        // Increment the store count
                        completedStoresCount.getAndIncrement();
                        updateStatus(getHeader(stealInfo) + "Completed working on " + completedStoresCount.get() + " out of " + totalStoresCount + " stores. Still rebalancing " + storesRebalancing);
                    } catch (Exception e) {
                        logger.error(getHeader(stealInfo) + "Error while rebalancing for store " + storeName + " - " + e.getMessage(), e);
                        failures.add(e);
                    }
                }
            });
        }
        waitForShutdown();
        // If empty, clean state
        List<String> unbalancedStores = Lists.newArrayList(stealInfo.getPartitionStores());
        if (unbalancedStores.isEmpty()) {
            logger.info(getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully for all " + totalStoresCount + " stores");
            updateStatus(getHeader(stealInfo) + "Rebalance of " + partitionStoreCount + " partition-stores completed successfully for all " + totalStoresCount + " stores");
            metadataStore.deleteRebalancingState(stealInfo);
        } else {
            throw new VoldemortRebalancingException(getHeader(stealInfo) + "Failed to rebalance task " + stealInfo + ". Could only complete " + completedStoresCount.get() + " out of " + totalStoresCount + " stores", failures);
        }
    } finally {
        // free the permit in all cases.
        logger.info(getHeader(stealInfo) + "Releasing permit for donor node " + stealInfo.getDonorId());
        rebalancer.releaseRebalancingPermit(stealInfo.getDonorId());
        adminClient.close();
        adminClient = null;
    }
}
Also used : VoldemortRebalancingException(voldemort.server.rebalance.VoldemortRebalancingException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) VoldemortRebalancingException(voldemort.server.rebalance.VoldemortRebalancingException)

Example 2 with VoldemortRebalancingException

use of voldemort.server.rebalance.VoldemortRebalancingException in project voldemort by voldemort.

the class AdminRebalanceTest method testRebalanceStateChange.

@Test(timeout = 60000)
public void testRebalanceStateChange() throws IOException {
    try {
        startFourNodeRW();
        // Test 1) Normal case where-in all are up
        adminClient.rebalanceOps.rebalanceStateChange(currentCluster, finalCluster, servers[2].getMetadataStore().getStoreDefList(), servers[2].getMetadataStore().getStoreDefList(), plans, false, false, true, true, true);
        List<Integer> nodesChecked = Lists.newArrayList();
        for (RebalanceTaskInfo plan : plans) {
            nodesChecked.add(plan.getStealerId());
            assertEquals(servers[plan.getStealerId()].getMetadataStore().getRebalancerState(), new RebalancerState(Lists.newArrayList(plan)));
        }
        List<Integer> allNodes = Lists.newArrayList(Utils.nodeListToNodeIdList(Lists.newArrayList(currentCluster.getNodes())));
        allNodes.removeAll(nodesChecked);
        // Check all other nodes
        for (int nodeId : allNodes) {
            assertEquals(servers[nodeId].getMetadataStore().getRebalancerState(), new RebalancerState(new ArrayList<RebalanceTaskInfo>()));
        }
        // Clean-up everything
        cleanUpAllState();
        // Test 2) Add a plan before hand on one of them which should
        // trigger a rollback
        servers[3].getMetadataStore().getRebalancerState().update(new RebalanceTaskInfo(3, 0, new HashMap<String, List<Integer>>(), currentCluster));
        try {
            adminClient.rebalanceOps.rebalanceStateChange(currentCluster, finalCluster, servers[2].getMetadataStore().getStoreDefList(), servers[2].getMetadataStore().getStoreDefList(), plans, false, false, true, true, true);
            fail("Should have thrown an exception since we added state before hand");
        } catch (VoldemortRebalancingException e) {
        }
        // except node 3
        for (VoldemortServer server : servers) {
            if (server.getMetadataStore().getNodeId() != 3) {
                assertEquals(server.getMetadataStore().getRebalancerState(), new RebalancerState(new ArrayList<RebalanceTaskInfo>()));
            }
        }
        // Clean-up everything
        cleanUpAllState();
        // Test 3) Shut one node down
        ServerTestUtils.stopVoldemortServer(servers[3]);
        servers[3] = null;
        try {
            adminClient.rebalanceOps.rebalanceStateChange(currentCluster, finalCluster, servers[2].getMetadataStore().getStoreDefList(), servers[2].getMetadataStore().getStoreDefList(), plans, false, false, true, true, true);
            fail("Should have thrown an exception since we added state before hand");
        } catch (VoldemortRebalancingException e) {
        }
        // exception node 3
        for (VoldemortServer server : servers) {
            if (server != null) {
                assertEquals(server.getMetadataStore().getRebalancerState(), new RebalancerState(new ArrayList<RebalanceTaskInfo>()));
            }
        }
    } finally {
        shutDown();
    }
}
Also used : VoldemortRebalancingException(voldemort.server.rebalance.VoldemortRebalancingException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RebalancerState(voldemort.server.rebalance.RebalancerState) VoldemortServer(voldemort.server.VoldemortServer) Test(org.junit.Test)

Example 3 with VoldemortRebalancingException

use of voldemort.server.rebalance.VoldemortRebalancingException in project voldemort by voldemort.

the class AdminRebalanceTest method testRebalanceNodeRORW.

@Test(timeout = 60000)
public void testRebalanceNodeRORW() throws IOException, InterruptedException {
    try {
        startFourNodeRORW();
        int numChunks = 5;
        for (StoreDefinition storeDef : Lists.newArrayList(storeDef1, storeDef2)) {
            buildROStore(storeDef, numChunks);
        }
        // Set into rebalancing state
        for (RebalanceTaskInfo partitionPlan : plans) {
            getServer(partitionPlan.getStealerId()).getMetadataStore().put(MetadataStore.SERVER_STATE_KEY, MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER);
            getServer(partitionPlan.getStealerId()).getMetadataStore().put(MetadataStore.REBALANCING_STEAL_INFO, new RebalancerState(Lists.newArrayList(RebalanceTaskInfo.create(partitionPlan.toJsonString()))));
            getServer(partitionPlan.getStealerId()).getMetadataStore().put(MetadataStore.REBALANCING_SOURCE_CLUSTER_XML, partitionPlan.getInitialCluster());
        }
        // Actually run it
        try {
            for (RebalanceTaskInfo currentPlan : plans) {
                int asyncId = adminClient.rebalanceOps.rebalanceNode(currentPlan);
                assertNotSame("Got a valid rebalanceAsyncId", -1, asyncId);
                getAdminClient().rpcOps.waitForCompletion(currentPlan.getStealerId(), asyncId, 300, TimeUnit.SECONDS);
                // Test that plan has been removed from the list
                assertFalse(getServer(currentPlan.getStealerId()).getMetadataStore().getRebalancerState().getAll().contains(currentPlan));
            }
        } catch (Exception e) {
            e.printStackTrace();
            fail("Should not throw any exceptions");
        }
        // Test 1) Change one of the rebalance partitions info to force a
        // failure
        servers[3].getMetadataStore().getRebalancerState().update(new RebalanceTaskInfo(3, 0, new HashMap<String, List<Integer>>(), currentCluster));
        try {
            adminClient.rebalanceOps.rebalanceStateChange(currentCluster, finalCluster, servers[2].getMetadataStore().getStoreDefList(), servers[2].getMetadataStore().getStoreDefList(), plans, true, true, true, true, true);
            fail("Should have thrown an exception since we added state before hand");
        } catch (VoldemortRebalancingException e) {
        }
        // except node 3
        for (VoldemortServer server : servers) {
            if (server.getMetadataStore().getNodeId() != 3) {
                assertEquals(server.getMetadataStore().getRebalancerState(), new RebalancerState(new ArrayList<RebalanceTaskInfo>()));
                assertEquals(server.getMetadataStore().getServerStateUnlocked(), MetadataStore.VoldemortState.NORMAL_SERVER);
            }
            assertEquals(server.getMetadataStore().getCluster(), currentCluster);
        }
        checkRO(currentCluster);
        // Clean-up everything
        cleanUpAllState();
        // Test 2 ) Add another store to trigger a failure
        servers[2].getMetadataStore().put(MetadataStore.STORES_KEY, Lists.newArrayList(storeDef1, storeDef2, storeDef3, storeDef4, new StoreDefinitionBuilder().setName("test5").setType(ReadOnlyStorageConfiguration.TYPE_NAME).setKeySerializer(new SerializerDefinition("string")).setValueSerializer(new SerializerDefinition("string")).setRoutingPolicy(RoutingTier.CLIENT).setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY).setReplicationFactor(2).setPreferredReads(1).setRequiredReads(1).setPreferredWrites(1).setRequiredWrites(1).build()));
        try {
            adminClient.rebalanceOps.rebalanceStateChange(currentCluster, finalCluster, servers[2].getMetadataStore().getStoreDefList(), servers[2].getMetadataStore().getStoreDefList(), plans, true, true, true, true, true);
            fail("Should have thrown an exception since we added state before hand");
        } catch (VoldemortRebalancingException e) {
        }
        Thread.sleep(1000);
        for (VoldemortServer server : servers) {
            assertEquals(server.getMetadataStore().getRebalancerState(), new RebalancerState(new ArrayList<RebalanceTaskInfo>()));
            assertEquals(server.getMetadataStore().getServerStateUnlocked(), MetadataStore.VoldemortState.NORMAL_SERVER);
            assertEquals(server.getMetadataStore().getCluster(), currentCluster);
        }
        checkRO(currentCluster);
        // Clean-up everything
        cleanUpAllState();
        // Put back server 2 back to normal state
        servers[2].getMetadataStore().put(MetadataStore.STORES_KEY, Lists.newArrayList(storeDef1, storeDef2, storeDef3, storeDef4));
        // Test 3) Everything should work
        adminClient.rebalanceOps.rebalanceStateChange(currentCluster, finalCluster, servers[2].getMetadataStore().getStoreDefList(), servers[2].getMetadataStore().getStoreDefList(), plans, true, true, true, true, true);
        List<Integer> nodesChecked = Lists.newArrayList();
        for (RebalanceTaskInfo plan : plans) {
            nodesChecked.add(plan.getStealerId());
            assertEquals(servers[plan.getStealerId()].getMetadataStore().getRebalancerState(), new RebalancerState(Lists.newArrayList(plan)));
            assertEquals(servers[plan.getStealerId()].getMetadataStore().getServerStateUnlocked(), MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER);
            assertEquals(servers[plan.getStealerId()].getMetadataStore().getCluster(), finalCluster);
        }
        List<Integer> allNodes = Lists.newArrayList(Utils.nodeListToNodeIdList(Lists.newArrayList(currentCluster.getNodes())));
        allNodes.removeAll(nodesChecked);
        // Check all other nodes
        for (int nodeId : allNodes) {
            assertEquals(servers[nodeId].getMetadataStore().getRebalancerState(), new RebalancerState(new ArrayList<RebalanceTaskInfo>()));
            assertEquals(servers[nodeId].getMetadataStore().getServerStateUnlocked(), MetadataStore.VoldemortState.NORMAL_SERVER);
            assertEquals(servers[nodeId].getMetadataStore().getCluster(), finalCluster);
        }
        checkRO(finalCluster);
    } finally {
        shutDown();
    }
}
Also used : StoreDefinitionBuilder(voldemort.store.StoreDefinitionBuilder) VoldemortRebalancingException(voldemort.server.rebalance.VoldemortRebalancingException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VoldemortServer(voldemort.server.VoldemortServer) AlreadyRebalancingException(voldemort.server.rebalance.AlreadyRebalancingException) VoldemortRebalancingException(voldemort.server.rebalance.VoldemortRebalancingException) VoldemortException(voldemort.VoldemortException) IOException(java.io.IOException) StoreDefinition(voldemort.store.StoreDefinition) RebalancerState(voldemort.server.rebalance.RebalancerState) SerializerDefinition(voldemort.serialization.SerializerDefinition) Test(org.junit.Test)

Example 4 with VoldemortRebalancingException

use of voldemort.server.rebalance.VoldemortRebalancingException in project voldemort by voldemort.

the class RebalanceUtils method validateClusterPartitionState.

/**
     * Confirm that all nodes shared between clusters host exact same partition
     * IDs and that nodes only in the super set cluster have no partition IDs.
     * 
     * @param subsetCluster
     * @param supersetCluster
     */
public static void validateClusterPartitionState(final Cluster subsetCluster, final Cluster supersetCluster) {
    if (!supersetCluster.getNodeIds().containsAll(subsetCluster.getNodeIds())) {
        throw new VoldemortException("Superset cluster does not contain all nodes from subset cluster[ subset cluster node ids (" + subsetCluster.getNodeIds() + ") are not a subset of superset cluster node ids (" + supersetCluster.getNodeIds() + ") ]");
    }
    for (int nodeId : subsetCluster.getNodeIds()) {
        Node supersetNode = supersetCluster.getNodeById(nodeId);
        Node subsetNode = subsetCluster.getNodeById(nodeId);
        if (!supersetNode.getPartitionIds().equals(subsetNode.getPartitionIds())) {
            throw new VoldemortRebalancingException("Partition IDs do not match between clusters for nodes with id " + nodeId + " : subset cluster has " + subsetNode.getPartitionIds() + " and superset cluster has " + supersetNode.getPartitionIds());
        }
    }
    Set<Integer> nodeIds = supersetCluster.getNodeIds();
    nodeIds.removeAll(subsetCluster.getNodeIds());
    for (int nodeId : nodeIds) {
        Node supersetNode = supersetCluster.getNodeById(nodeId);
        if (!supersetNode.getPartitionIds().isEmpty()) {
            throw new VoldemortRebalancingException("New node " + nodeId + " in superset cluster already has partitions: " + supersetNode.getPartitionIds());
        }
    }
}
Also used : VoldemortRebalancingException(voldemort.server.rebalance.VoldemortRebalancingException) Node(voldemort.cluster.Node) VoldemortException(voldemort.VoldemortException)

Example 5 with VoldemortRebalancingException

use of voldemort.server.rebalance.VoldemortRebalancingException in project voldemort by voldemort.

the class RebalanceScheduler method scheduleNextTask.

/**
     * Schedule at most one task.
     * 
     * The scheduled task *must* invoke 'doneTask()' upon
     * completion/termination.
     * 
     * @param executeService flag to control execution of the service, some tests pass
     *        in value 'false'
     * @return The task scheduled or null if not possible to schedule a task at
     *         this time.
     */
protected synchronized StealerBasedRebalanceTask scheduleNextTask(boolean executeService) {
    // Make sure there is work left to do.
    if (doneSignal.getCount() == 0) {
        logger.info("All tasks completion signaled... returning");
        return null;
    }
    // Limit number of tasks outstanding.
    if (this.numTasksExecuting >= maxParallelRebalancing) {
        logger.info("Executing more tasks than [" + this.numTasksExecuting + "] the parallel allowed " + maxParallelRebalancing);
        return null;
    }
    // Shuffle list of stealer IDs each time a new task to schedule needs to
    // be found. Randomizing the order should avoid prioritizing one
    // specific stealer's work ahead of all others.
    List<Integer> stealerIds = new ArrayList<Integer>(tasksByStealer.keySet());
    Collections.shuffle(stealerIds);
    for (int stealerId : stealerIds) {
        if (nodeIdsWithWork.contains(stealerId)) {
            logger.info("Stealer " + stealerId + " is already working... continuing");
            continue;
        }
        for (StealerBasedRebalanceTask sbTask : tasksByStealer.get(stealerId)) {
            int donorId = sbTask.getStealInfos().get(0).getDonorId();
            if (nodeIdsWithWork.contains(donorId)) {
                logger.info("Stealer " + stealerId + " Donor " + donorId + " is already working... continuing");
                continue;
            }
            // Book keeping
            addNodesToWorkerList(Arrays.asList(stealerId, donorId));
            numTasksExecuting++;
            // Remove this task from list thus destroying list being
            // iterated over. This is safe because returning directly out of
            // this branch.
            tasksByStealer.get(stealerId).remove(sbTask);
            try {
                if (executeService) {
                    logger.info("Stealer " + stealerId + " Donor " + donorId + " going to schedule work");
                    service.execute(sbTask);
                }
            } catch (RejectedExecutionException ree) {
                logger.error("Stealer " + stealerId + "Rebalancing task rejected by executor service.", ree);
                throw new VoldemortRebalancingException("Stealer " + stealerId + "Rebalancing task rejected by executor service.");
            }
            return sbTask;
        }
    }
    printRemainingTasks(stealerIds);
    return null;
}
Also used : VoldemortRebalancingException(voldemort.server.rebalance.VoldemortRebalancingException) StealerBasedRebalanceTask(voldemort.client.rebalance.task.StealerBasedRebalanceTask) ArrayList(java.util.ArrayList) RejectedExecutionException(java.util.concurrent.RejectedExecutionException)

Aggregations

VoldemortRebalancingException (voldemort.server.rebalance.VoldemortRebalancingException)8 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)4 Test (org.junit.Test)3 VoldemortException (voldemort.VoldemortException)3 VoldemortServer (voldemort.server.VoldemortServer)3 RebalancerState (voldemort.server.rebalance.RebalancerState)3 StealerBasedRebalanceTask (voldemort.client.rebalance.task.StealerBasedRebalanceTask)2 Node (voldemort.cluster.Node)2 IOException (java.io.IOException)1 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)1 ExecutorService (java.util.concurrent.ExecutorService)1 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)1 Semaphore (java.util.concurrent.Semaphore)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 RebalanceTask (voldemort.client.rebalance.task.RebalanceTask)1 SerializerDefinition (voldemort.serialization.SerializerDefinition)1 AlreadyRebalancingException (voldemort.server.rebalance.AlreadyRebalancingException)1 StoreDefinition (voldemort.store.StoreDefinition)1 StoreDefinitionBuilder (voldemort.store.StoreDefinitionBuilder)1