Search in sources :

Example 61 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class ImportTextDumpToBDB method main.

public static void main(String[] argv) throws Exception {
    OptionParser parser = getParser();
    OptionSet options = parser.parse(argv);
    String inputPath = (String) options.valueOf("input");
    String storeBdbFolderPath = (String) options.valueOf("bdb");
    String clusterXmlPath = (String) options.valueOf("cluster-xml");
    String storesXmlPath = (String) options.valueOf("stores-xml");
    Integer nodeId = (Integer) options.valueOf("node-id");
    File input = new File(inputPath);
    List<File> dataFiles = new ArrayList<File>();
    if (input.isDirectory()) {
        File[] files = input.listFiles();
        if (files != null)
            Collections.addAll(dataFiles, files);
    } else if (input.isFile()) {
    } else {
        System.err.println(inputPath + "is not file or directory");
    File storeBdbFolder = new File(storeBdbFolderPath);
    final String storeName = storeBdbFolder.getName();
    Cluster cluster = new ClusterMapper().readCluster(new File(clusterXmlPath));
    List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new File(storesXmlPath));
    StoreDefinition storeDef = null;
    for (StoreDefinition sd : storeDefs) {
        if (sd.getName() != null && sd.getName().equals(storeName)) {
            storeDef = sd;
    if (storeDef == null) {
        throw new VoldemortException("StoreNotfound: " + storeName);
    RoutingStrategy routingStrategy = new RoutingStrategyFactory().updateRoutingStrategy(storeDef, cluster);
    Properties properties = new Properties();
    properties.put("", "0");
    properties.put("voldemort.home", storeBdbFolder.getParent());
    VoldemortConfig voldemortConfig = new VoldemortConfig(properties);
    BdbStorageConfiguration bdbConfiguration = new BdbStorageConfiguration(voldemortConfig);
    class MockStoreDefinition extends StoreDefinition {

        public MockStoreDefinition() {
            super(storeName, null, null, null, null, null, null, null, 0, null, 0, null, 0, null, null, null, null, null, null, null, null, null, null, null, null, 0);

        public boolean hasMemoryFootprint() {
            return false;
    StoreDefinition mockStoreDef = new MockStoreDefinition();
    StorageEngine<ByteArray, byte[], byte[]> engine = bdbConfiguration.getStore(mockStoreDef, routingStrategy);
    long reportIntervalMs = 10000L;
    long lastCount = 0;
    long lastInserted = 0;
    Reporter<Boolean> rp = new Reporter<Boolean>(reportIntervalMs);
    long count = 0;
    long inserted = 0;
    for (File f : dataFiles) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(f), READER_BUFFER_SIZE);
            while (true) {
                String line = bufferedReader.readLine();
                if (line == null) {
                Pair<ByteArray, Versioned<byte[]>> entry;
                try {
                    entry = lineToEntry(line);
                } catch (Exception e) {
                    System.err.println("Skipping line: " + line);
                ByteArray key = entry.getFirst();
                List<Node> nodeList = routingStrategy.routeRequest(key.get());
                for (Node node : nodeList) {
                    if (nodeId == node.getId()) {
                        try {
                            engine.put(key, entry.getSecond(), null);
                        } catch (ObsoleteVersionException e) {
                final Long countObject = count;
                final Long insertedObject = inserted;
                Boolean reported = rp.tryReport(new Callable<Boolean>() {

                    public Boolean call() throws Exception {
                        System.out.print(String.format("Imported %15d entries; Inserted %15d entries", countObject, insertedObject));
                        return true;
                if (reported != null) {
                    long importSpeed = (count - lastCount) / (reportIntervalMs / 1000);
                    long insertSpeed = (inserted - lastInserted) / (reportIntervalMs / 1000);
                    System.out.println(String.format("; ImportSpeed: %8d/s; InsertSpeed: %8d/s ", importSpeed, insertSpeed));
                    lastCount = count;
                    lastInserted = inserted;
        } catch (IOException e) {
        } finally {
    System.out.println(String.format("Finished importing %d entries (%d inserted, rest discarded)", count, inserted));
Also used : Versioned(voldemort.versioning.Versioned) RoutingStrategyFactory(voldemort.routing.RoutingStrategyFactory) Node(voldemort.cluster.Node) OptionParser(joptsimple.OptionParser) VoldemortException(voldemort.VoldemortException) VoldemortConfig(voldemort.server.VoldemortConfig) StoreDefinition( RoutingStrategy(voldemort.routing.RoutingStrategy) ByteArray(voldemort.utils.ByteArray) BdbStorageConfiguration( StoreDefinitionsMapper(voldemort.xml.StoreDefinitionsMapper) Cluster(voldemort.cluster.Cluster) ClusterMapper(voldemort.xml.ClusterMapper) VoldemortException(voldemort.VoldemortException) ObsoleteVersionException(voldemort.versioning.ObsoleteVersionException) DecoderException(org.apache.commons.codec.DecoderException) ObsoleteVersionException(voldemort.versioning.ObsoleteVersionException) OptionSet(joptsimple.OptionSet)

Example 62 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class PartitionAnalysisCLI method main.

public static void main(String[] args) throws Exception {
    OptionSet options = getValidOptions(args);
    String clusterXML = (String) options.valueOf("cluster");
    String storesXML = (String) options.valueOf("stores");
    Cluster currentCluster = new ClusterMapper().readCluster(new File(clusterXML));
    List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new File(storesXML));
    PartitionBalance partitionBalance = new PartitionBalance(currentCluster, storeDefs);
Also used : StoreDefinition( StoreDefinitionsMapper(voldemort.xml.StoreDefinitionsMapper) Cluster(voldemort.cluster.Cluster) ClusterMapper(voldemort.xml.ClusterMapper) OptionSet(joptsimple.OptionSet) File(

Example 63 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class Repartitioner method repeatedlyBalanceContiguousPartitionsPerZone.

     * Loops over cluster and repeatedly tries to break up contiguous runs of
     * partitions. After each phase of breaking up contiguous partitions, random
     * partitions are selected to move between zones to balance the number of
     * partitions in each zone. The second phase may re-introduce contiguous
     * partition runs in another zone. Therefore, this overall process is
     * repeated multiple times.
     * @param nextCandidateCluster
     * @param maxContiguousPartitionsPerZone See RebalanceCLI.
     * @return updated cluster
public static Cluster repeatedlyBalanceContiguousPartitionsPerZone(final Cluster nextCandidateCluster, final int maxContiguousPartitionsPerZone) {
    System.out.println("Looping to evenly balance partitions across zones while limiting contiguous partitions");
    // This loop is hard to make definitive. I.e., there are corner cases
    // for small clusters and/or clusters with few partitions for which it
    // may be impossible to achieve tight limits on contiguous run lenghts.
    // Therefore, a constant number of loops are run. Note that once the
    // goal is reached, the loop becomes a no-op.
    int repeatContigBalance = 10;
    Cluster returnCluster = nextCandidateCluster;
    for (int i = 0; i < repeatContigBalance; i++) {
        returnCluster = balanceContiguousPartitionsPerZone(returnCluster, maxContiguousPartitionsPerZone);
        returnCluster = balancePrimaryPartitions(returnCluster, false);
        System.out.println("Completed round of balancing contiguous partitions: round " + (i + 1) + " of " + repeatContigBalance);
    return returnCluster;
Also used : Cluster(voldemort.cluster.Cluster)

Example 64 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class Repartitioner method repartition.

     * Runs a number of distinct algorithms over the specified clusters/store
     * defs to better balance partition IDs over nodes such that all nodes have
     * similar iops and capacity usage.
     * The algorithms (in order):
     * <ul>
     * <li>Get rid of contiguous runs of partition IDs within a zone. Such runs
     * make balancing load overall more difficult.
     * <li>Balance partition IDs among zones and/or among nodes within zones.
     * <li>Randomly swap partition IDs among nodes to improve overall balance.
     * (Any swap that improves balance is accepted.)
     * <li>Greedily swap partition IDs among nodes to improve overall balance.
     * (Some number of swaps are considered and the best of which is accepted.)
     * </ul>
     * This method is used for three key use cases:
     * <ul>
     * <li>Shuffling : Distribute partition IDs better for an existing cluster.
     * <li>Cluster expansion : Distribute partition IDs to take advantage of new
     * nodes (added to some of the zones).
     * <li>Zone expansion : Distribute partition IDs into a new zone.
     * </ul>
     * @param currentCluster current cluster
     * @param currentStoreDefs current store defs
     * @param interimCluster interim cluster; needed for cluster or zone
     *        expansion, otherwise pass in same as currentCluster.
     * @param finalStoreDefs final store defs; needed for zone expansion,
     *        otherwise pass in same as currentStores.
     * @param outputDir Directory in which to dump cluster xml and analysis
     *        files.
     * @param attempts Number of distinct repartitionings to attempt, the best
     *        of which is returned.
     * @param disableNodeBalancing Disables the core algorithm that balances
     *        primaries among nodes within each zone.
     * @param disableZoneBalancing For the core algorithm that balances
     *        primaries among nodes in each zone, disable balancing primaries
     *        among zones.
     * @param enableRandomSwaps Enables random swap optimization.
     * @param randomSwapAttempts
     * @param randomSwapSuccesses
     * @param randomSwapZoneIds
     * @param enableGreedySwaps Enables greedy swap optimization.
     * @param greedySwapAttempts
     * @param greedySwapMaxPartitionsPerNode
     * @param greedySwapMaxPartitionsPerZone
     * @param greedySwapZoneIds
     * @param maxContiguousPartitionsPerZone
     * @return "final cluster" that has had all specified balancing algorithms
     *         run against it. The number of zones and number of nodes will
     *         match that of the specified "interim cluster".
public static Cluster repartition(final Cluster currentCluster, final List<StoreDefinition> currentStoreDefs, final Cluster interimCluster, final List<StoreDefinition> finalStoreDefs, final String outputDir, final int attempts, final boolean disableNodeBalancing, final boolean disableZoneBalancing, final boolean enableRandomSwaps, final int randomSwapAttempts, final int randomSwapSuccesses, final List<Integer> randomSwapZoneIds, final boolean enableGreedySwaps, final int greedySwapAttempts, final int greedySwapMaxPartitionsPerNode, final int greedySwapMaxPartitionsPerZone, final List<Integer> greedySwapZoneIds, final int maxContiguousPartitionsPerZone) {
    PartitionBalance partitionBalance = new PartitionBalance(currentCluster, currentStoreDefs);
    RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.currentClusterFileName, partitionBalance);
    Cluster minCluster = interimCluster;
    double minUtility = Double.MAX_VALUE;
    for (int attempt = 0; attempt < attempts; attempt++) {
        Cluster nextCandidateCluster = interimCluster;
        if (maxContiguousPartitionsPerZone > 0) {
            nextCandidateCluster = repeatedlyBalanceContiguousPartitionsPerZone(nextCandidateCluster, maxContiguousPartitionsPerZone);
        if (!disableNodeBalancing) {
            nextCandidateCluster = balancePrimaryPartitions(nextCandidateCluster, !disableZoneBalancing);
        if (enableRandomSwaps) {
            nextCandidateCluster = randomShufflePartitions(nextCandidateCluster, randomSwapAttempts, randomSwapSuccesses, randomSwapZoneIds, finalStoreDefs);
        if (enableGreedySwaps) {
            nextCandidateCluster = greedyShufflePartitions(nextCandidateCluster, greedySwapAttempts, greedySwapMaxPartitionsPerNode, greedySwapMaxPartitionsPerZone, greedySwapZoneIds, finalStoreDefs);
        RebalanceUtils.validateCurrentFinalCluster(currentCluster, nextCandidateCluster);
        partitionBalance = new PartitionBalance(nextCandidateCluster, finalStoreDefs);
        double currentUtility = partitionBalance.getUtility();
        System.out.println("Optimization number " + attempt + ": " + currentUtility + " max/min ratio");
        System.out.println(PartitionBalanceUtils.analyzeInvalidMetadataRate(interimCluster, finalStoreDefs, nextCandidateCluster, finalStoreDefs));
        if (currentUtility <= minUtility) {
            minUtility = currentUtility;
            minCluster = nextCandidateCluster;
            RebalanceUtils.dumpClusterToFile(outputDir, RebalanceUtils.finalClusterFileName + attempt, minCluster);
            RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.finalClusterFileName + attempt, partitionBalance);
    System.out.println("Final distribution");
    partitionBalance = new PartitionBalance(minCluster, finalStoreDefs);
    RebalanceUtils.dumpClusterToFile(outputDir, RebalanceUtils.finalClusterFileName, minCluster);
    RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.finalClusterFileName, partitionBalance);
    return minCluster;
Also used : Cluster(voldemort.cluster.Cluster)

Example 65 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class Repartitioner method balancePrimaryPartitions.

     * This method balances primary partitions among nodes within a zone, and
     * optionally primary partitions among zones. The balancing is done at the
     * level of partitionIds. Such partition Id movement may, or may not, result
     * in data movement during a rebalancing. See RebalancePlan for the object
     * responsible for determining which partition-stores move where for a
     * specific repartitioning.
     * @param nextCandidateCluster
     * @param balanceZones indicates whether or not number of primary partitions
     *        per zone should be balanced.
     * @return updated cluster
public static Cluster balancePrimaryPartitions(final Cluster nextCandidateCluster, boolean balanceZones) {
    System.out.println("Balance number of partitions across all nodes and zones.");
    Map<Integer, Integer> targetPartitionsPerZone;
    if (balanceZones) {
        targetPartitionsPerZone = Utils.distributeEvenlyIntoMap(nextCandidateCluster.getZoneIds(), nextCandidateCluster.getNumberOfPartitions());
        for (int zoneId : nextCandidateCluster.getZoneIds()) {
            System.out.println(zoneId + " : " + nextCandidateCluster.getNumberOfPartitionsInZone(zoneId) + " -> " + targetPartitionsPerZone.get(zoneId));
        for (int zoneId : nextCandidateCluster.getZoneIds()) {
            System.out.println(zoneId + " : " + nextCandidateCluster.getNumberOfNodesInZone(zoneId));
    } else {
        // Keep number of partitions per zone the same.
        targetPartitionsPerZone = new HashMap<Integer, Integer>();
        for (int zoneId : nextCandidateCluster.getZoneIds()) {
            targetPartitionsPerZone.put(zoneId, nextCandidateCluster.getNumberOfPartitionsInZone(zoneId));
    HashMap<Integer, List<Integer>> numPartitionsPerNodeByZone = getBalancedNumberOfPrimaryPartitionsPerNode(nextCandidateCluster, targetPartitionsPerZone);
    Pair<HashMap<Node, Integer>, HashMap<Node, Integer>> donorsAndStealers = getDonorsAndStealersForBalance(nextCandidateCluster, numPartitionsPerNodeByZone);
    HashMap<Node, Integer> donorNodes = donorsAndStealers.getFirst();
    List<Node> donorNodeKeys = new ArrayList<Node>(donorNodes.keySet());
    HashMap<Node, Integer> stealerNodes = donorsAndStealers.getSecond();
    List<Node> stealerNodeKeys = new ArrayList<Node>(stealerNodes.keySet());
         * There is no "intelligence" here about which partition IDs are moved
         * where. The RebalancePlan object owns determining how to move data
         * around to meet a specific repartitioning. That said, a little bit of
         * intelligence here may go a long way. For example, for zone expansion
         * data could be minimized by:
         * (1) Selecting a minimal # of partition IDs for the new zoneto
         * minimize how much the ring in existing zones is perturbed;
         * (2) Selecting partitions for the new zone from contiguous runs of
         * partition IDs in other zones that are not currently n-ary partitions
         * for other primary partitions;
         * (3) Some combination of (1) and (2)...
    // Go over every stealerNode and steal partition Ids from donor nodes
    Cluster returnCluster = Cluster.cloneCluster(nextCandidateCluster);
    Collections.shuffle(stealerNodeKeys, new Random(System.currentTimeMillis()));
    for (Node stealerNode : stealerNodeKeys) {
        int partitionsToSteal = stealerNodes.get(stealerNode) - stealerNode.getNumberOfPartitions();
        System.out.println("Node (" + stealerNode.getId() + ") in zone (" + stealerNode.getZoneId() + ") has partitionsToSteal of " + partitionsToSteal);
        while (partitionsToSteal > 0) {
            Collections.shuffle(donorNodeKeys, new Random(System.currentTimeMillis()));
            // Repeatedly loop over donor nodes to distribute stealing
            for (Node donorNode : donorNodeKeys) {
                Node currentDonorNode = returnCluster.getNodeById(donorNode.getId());
                // Only steal from donor nodes with extra partitions
                int partitionsToDonate = currentDonorNode.getNumberOfPartitions() - donorNodes.get(donorNode);
                if (partitionsToDonate <= 0) {
                List<Integer> donorPartitions = Lists.newArrayList(currentDonorNode.getPartitionIds());
                Collections.shuffle(donorPartitions, new Random(System.currentTimeMillis()));
                for (int donorPartition : donorPartitions) {
                    Cluster intermediateCluster = UpdateClusterUtils.createUpdatedCluster(returnCluster, stealerNode.getId(), Lists.newArrayList(donorPartition));
                    returnCluster = intermediateCluster;
                    System.out.println("Stealer node " + stealerNode.getId() + ", donor node " + currentDonorNode.getId() + ", partition stolen " + donorPartition);
                    if (partitionsToSteal == 0 || partitionsToDonate == 0)
                if (partitionsToSteal == 0)
    return returnCluster;
Also used : HashMap(java.util.HashMap) Node(voldemort.cluster.Node) ArrayList(java.util.ArrayList) Cluster(voldemort.cluster.Cluster) Random(java.util.Random) ArrayList(java.util.ArrayList) List(java.util.List)


Cluster (voldemort.cluster.Cluster)197 Test (org.junit.Test)74 StoreDefinition ( Node (voldemort.cluster.Node)72 ArrayList (java.util.ArrayList)51 HashMap (java.util.HashMap)47 ByteArray (voldemort.utils.ByteArray)33 AdminClient (voldemort.client.protocol.admin.AdminClient)26 ClusterTestUtils (voldemort.ClusterTestUtils)25 VoldemortException (voldemort.VoldemortException)24 List (java.util.List)23 ClusterMapper (voldemort.xml.ClusterMapper)23 File ( StoreDefinitionsMapper (voldemort.xml.StoreDefinitionsMapper)18 Zone (voldemort.cluster.Zone)17 Versioned (voldemort.versioning.Versioned)17 Properties (java.util.Properties)16 IOException ( VoldemortServer (voldemort.server.VoldemortServer)15 RoutingStrategyFactory (voldemort.routing.RoutingStrategyFactory)14