Search in sources :

Example 61 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class ImportTextDumpToBDB method main.

public static void main(String[] argv) throws Exception {
    OptionParser parser = getParser();
    OptionSet options = parser.parse(argv);
    validateOptions(options);
    String inputPath = (String) options.valueOf("input");
    String storeBdbFolderPath = (String) options.valueOf("bdb");
    String clusterXmlPath = (String) options.valueOf("cluster-xml");
    String storesXmlPath = (String) options.valueOf("stores-xml");
    Integer nodeId = (Integer) options.valueOf("node-id");
    File input = new File(inputPath);
    List<File> dataFiles = new ArrayList<File>();
    if (input.isDirectory()) {
        File[] files = input.listFiles();
        if (files != null)
            Collections.addAll(dataFiles, files);
    } else if (input.isFile()) {
        dataFiles.add(input);
    } else {
        System.err.println(inputPath + "is not file or directory");
    }
    File storeBdbFolder = new File(storeBdbFolderPath);
    final String storeName = storeBdbFolder.getName();
    Cluster cluster = new ClusterMapper().readCluster(new File(clusterXmlPath));
    List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new File(storesXmlPath));
    StoreDefinition storeDef = null;
    for (StoreDefinition sd : storeDefs) {
        if (sd.getName() != null && sd.getName().equals(storeName)) {
            storeDef = sd;
        }
    }
    if (storeDef == null) {
        throw new VoldemortException("StoreNotfound: " + storeName);
    }
    RoutingStrategy routingStrategy = new RoutingStrategyFactory().updateRoutingStrategy(storeDef, cluster);
    Properties properties = new Properties();
    properties.put("node.id", "0");
    properties.put("voldemort.home", storeBdbFolder.getParent());
    VoldemortConfig voldemortConfig = new VoldemortConfig(properties);
    voldemortConfig.setBdbDataDirectory(storeBdbFolder.getParent());
    voldemortConfig.setEnableJmx(false);
    voldemortConfig.setBdbOneEnvPerStore(true);
    BdbStorageConfiguration bdbConfiguration = new BdbStorageConfiguration(voldemortConfig);
    class MockStoreDefinition extends StoreDefinition {

        public MockStoreDefinition() {
            super(storeName, null, null, null, null, null, null, null, 0, null, 0, null, 0, null, null, null, null, null, null, null, null, null, null, null, null, 0);
        }

        @Override
        public boolean hasMemoryFootprint() {
            return false;
        }
    }
    StoreDefinition mockStoreDef = new MockStoreDefinition();
    StorageEngine<ByteArray, byte[], byte[]> engine = bdbConfiguration.getStore(mockStoreDef, routingStrategy);
    long reportIntervalMs = 10000L;
    long lastCount = 0;
    long lastInserted = 0;
    Reporter<Boolean> rp = new Reporter<Boolean>(reportIntervalMs);
    long count = 0;
    long inserted = 0;
    for (File f : dataFiles) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(f), READER_BUFFER_SIZE);
            engine.beginBatchModifications();
            while (true) {
                String line = bufferedReader.readLine();
                if (line == null) {
                    break;
                }
                Pair<ByteArray, Versioned<byte[]>> entry;
                try {
                    entry = lineToEntry(line);
                } catch (Exception e) {
                    System.err.println("Skipping line: " + line);
                    e.printStackTrace();
                    continue;
                }
                ByteArray key = entry.getFirst();
                List<Node> nodeList = routingStrategy.routeRequest(key.get());
                for (Node node : nodeList) {
                    if (nodeId == node.getId()) {
                        try {
                            engine.put(key, entry.getSecond(), null);
                            inserted++;
                        } catch (ObsoleteVersionException e) {
                            e.printStackTrace();
                        }
                        break;
                    }
                }
                count++;
                final Long countObject = count;
                final Long insertedObject = inserted;
                Boolean reported = rp.tryReport(new Callable<Boolean>() {

                    @Override
                    public Boolean call() throws Exception {
                        System.out.print(String.format("Imported %15d entries; Inserted %15d entries", countObject, insertedObject));
                        return true;
                    }
                });
                if (reported != null) {
                    long importSpeed = (count - lastCount) / (reportIntervalMs / 1000);
                    long insertSpeed = (inserted - lastInserted) / (reportIntervalMs / 1000);
                    System.out.println(String.format("; ImportSpeed: %8d/s; InsertSpeed: %8d/s ", importSpeed, insertSpeed));
                    lastCount = count;
                    lastInserted = inserted;
                }
            }
            bufferedReader.close();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            engine.endBatchModifications();
        }
    }
    engine.close();
    System.out.println(String.format("Finished importing %d entries (%d inserted, rest discarded)", count, inserted));
}
Also used : Versioned(voldemort.versioning.Versioned) RoutingStrategyFactory(voldemort.routing.RoutingStrategyFactory) Node(voldemort.cluster.Node) OptionParser(joptsimple.OptionParser) VoldemortException(voldemort.VoldemortException) VoldemortConfig(voldemort.server.VoldemortConfig) StoreDefinition(voldemort.store.StoreDefinition) RoutingStrategy(voldemort.routing.RoutingStrategy) ByteArray(voldemort.utils.ByteArray) BdbStorageConfiguration(voldemort.store.bdb.BdbStorageConfiguration) StoreDefinitionsMapper(voldemort.xml.StoreDefinitionsMapper) Cluster(voldemort.cluster.Cluster) ClusterMapper(voldemort.xml.ClusterMapper) VoldemortException(voldemort.VoldemortException) ObsoleteVersionException(voldemort.versioning.ObsoleteVersionException) DecoderException(org.apache.commons.codec.DecoderException) ObsoleteVersionException(voldemort.versioning.ObsoleteVersionException) OptionSet(joptsimple.OptionSet)

Example 62 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class PartitionAnalysisCLI method main.

public static void main(String[] args) throws Exception {
    setupParser();
    OptionSet options = getValidOptions(args);
    String clusterXML = (String) options.valueOf("cluster");
    String storesXML = (String) options.valueOf("stores");
    Cluster currentCluster = new ClusterMapper().readCluster(new File(clusterXML));
    List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new File(storesXML));
    PartitionBalance partitionBalance = new PartitionBalance(currentCluster, storeDefs);
    System.out.println(partitionBalance);
}
Also used : StoreDefinition(voldemort.store.StoreDefinition) StoreDefinitionsMapper(voldemort.xml.StoreDefinitionsMapper) Cluster(voldemort.cluster.Cluster) ClusterMapper(voldemort.xml.ClusterMapper) OptionSet(joptsimple.OptionSet) File(java.io.File)

Example 63 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class Repartitioner method repeatedlyBalanceContiguousPartitionsPerZone.

/**
     * Loops over cluster and repeatedly tries to break up contiguous runs of
     * partitions. After each phase of breaking up contiguous partitions, random
     * partitions are selected to move between zones to balance the number of
     * partitions in each zone. The second phase may re-introduce contiguous
     * partition runs in another zone. Therefore, this overall process is
     * repeated multiple times.
     * 
     * @param nextCandidateCluster
     * @param maxContiguousPartitionsPerZone See RebalanceCLI.
     * @return updated cluster
     */
public static Cluster repeatedlyBalanceContiguousPartitionsPerZone(final Cluster nextCandidateCluster, final int maxContiguousPartitionsPerZone) {
    System.out.println("Looping to evenly balance partitions across zones while limiting contiguous partitions");
    // This loop is hard to make definitive. I.e., there are corner cases
    // for small clusters and/or clusters with few partitions for which it
    // may be impossible to achieve tight limits on contiguous run lenghts.
    // Therefore, a constant number of loops are run. Note that once the
    // goal is reached, the loop becomes a no-op.
    int repeatContigBalance = 10;
    Cluster returnCluster = nextCandidateCluster;
    for (int i = 0; i < repeatContigBalance; i++) {
        returnCluster = balanceContiguousPartitionsPerZone(returnCluster, maxContiguousPartitionsPerZone);
        returnCluster = balancePrimaryPartitions(returnCluster, false);
        System.out.println("Completed round of balancing contiguous partitions: round " + (i + 1) + " of " + repeatContigBalance);
    }
    return returnCluster;
}
Also used : Cluster(voldemort.cluster.Cluster)

Example 64 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class Repartitioner method repartition.

/**
     * Runs a number of distinct algorithms over the specified clusters/store
     * defs to better balance partition IDs over nodes such that all nodes have
     * similar iops and capacity usage.
     * 
     * The algorithms (in order):
     * <ul>
     * <li>Get rid of contiguous runs of partition IDs within a zone. Such runs
     * make balancing load overall more difficult.
     * <li>Balance partition IDs among zones and/or among nodes within zones.
     * <li>Randomly swap partition IDs among nodes to improve overall balance.
     * (Any swap that improves balance is accepted.)
     * <li>Greedily swap partition IDs among nodes to improve overall balance.
     * (Some number of swaps are considered and the best of which is accepted.)
     * </ul>
     * 
     * This method is used for three key use cases:
     * <ul>
     * <li>Shuffling : Distribute partition IDs better for an existing cluster.
     * <li>Cluster expansion : Distribute partition IDs to take advantage of new
     * nodes (added to some of the zones).
     * <li>Zone expansion : Distribute partition IDs into a new zone.
     * </ul>
     * 
     * @param currentCluster current cluster
     * @param currentStoreDefs current store defs
     * @param interimCluster interim cluster; needed for cluster or zone
     *        expansion, otherwise pass in same as currentCluster.
     * @param finalStoreDefs final store defs; needed for zone expansion,
     *        otherwise pass in same as currentStores.
     * @param outputDir Directory in which to dump cluster xml and analysis
     *        files.
     * @param attempts Number of distinct repartitionings to attempt, the best
     *        of which is returned.
     * @param disableNodeBalancing Disables the core algorithm that balances
     *        primaries among nodes within each zone.
     * @param disableZoneBalancing For the core algorithm that balances
     *        primaries among nodes in each zone, disable balancing primaries
     *        among zones.
     * @param enableRandomSwaps Enables random swap optimization.
     * @param randomSwapAttempts
     * @param randomSwapSuccesses
     * @param randomSwapZoneIds
     * @param enableGreedySwaps Enables greedy swap optimization.
     * @param greedySwapAttempts
     * @param greedySwapMaxPartitionsPerNode
     * @param greedySwapMaxPartitionsPerZone
     * @param greedySwapZoneIds
     * @param maxContiguousPartitionsPerZone
     * @return "final cluster" that has had all specified balancing algorithms
     *         run against it. The number of zones and number of nodes will
     *         match that of the specified "interim cluster".
     */
public static Cluster repartition(final Cluster currentCluster, final List<StoreDefinition> currentStoreDefs, final Cluster interimCluster, final List<StoreDefinition> finalStoreDefs, final String outputDir, final int attempts, final boolean disableNodeBalancing, final boolean disableZoneBalancing, final boolean enableRandomSwaps, final int randomSwapAttempts, final int randomSwapSuccesses, final List<Integer> randomSwapZoneIds, final boolean enableGreedySwaps, final int greedySwapAttempts, final int greedySwapMaxPartitionsPerNode, final int greedySwapMaxPartitionsPerZone, final List<Integer> greedySwapZoneIds, final int maxContiguousPartitionsPerZone) {
    PartitionBalance partitionBalance = new PartitionBalance(currentCluster, currentStoreDefs);
    RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.currentClusterFileName, partitionBalance);
    Cluster minCluster = interimCluster;
    double minUtility = Double.MAX_VALUE;
    for (int attempt = 0; attempt < attempts; attempt++) {
        Cluster nextCandidateCluster = interimCluster;
        if (maxContiguousPartitionsPerZone > 0) {
            nextCandidateCluster = repeatedlyBalanceContiguousPartitionsPerZone(nextCandidateCluster, maxContiguousPartitionsPerZone);
        }
        if (!disableNodeBalancing) {
            nextCandidateCluster = balancePrimaryPartitions(nextCandidateCluster, !disableZoneBalancing);
        }
        if (enableRandomSwaps) {
            nextCandidateCluster = randomShufflePartitions(nextCandidateCluster, randomSwapAttempts, randomSwapSuccesses, randomSwapZoneIds, finalStoreDefs);
        }
        if (enableGreedySwaps) {
            nextCandidateCluster = greedyShufflePartitions(nextCandidateCluster, greedySwapAttempts, greedySwapMaxPartitionsPerNode, greedySwapMaxPartitionsPerZone, greedySwapZoneIds, finalStoreDefs);
        }
        RebalanceUtils.validateCurrentFinalCluster(currentCluster, nextCandidateCluster);
        System.out.println("-------------------------\n");
        partitionBalance = new PartitionBalance(nextCandidateCluster, finalStoreDefs);
        double currentUtility = partitionBalance.getUtility();
        System.out.println("Optimization number " + attempt + ": " + currentUtility + " max/min ratio");
        System.out.println("-------------------------\n");
        System.out.println(PartitionBalanceUtils.analyzeInvalidMetadataRate(interimCluster, finalStoreDefs, nextCandidateCluster, finalStoreDefs));
        if (currentUtility <= minUtility) {
            minUtility = currentUtility;
            minCluster = nextCandidateCluster;
            RebalanceUtils.dumpClusterToFile(outputDir, RebalanceUtils.finalClusterFileName + attempt, minCluster);
            RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.finalClusterFileName + attempt, partitionBalance);
        }
        System.out.println("-------------------------\n");
    }
    System.out.println("\n==========================");
    System.out.println("Final distribution");
    partitionBalance = new PartitionBalance(minCluster, finalStoreDefs);
    System.out.println(partitionBalance);
    RebalanceUtils.dumpClusterToFile(outputDir, RebalanceUtils.finalClusterFileName, minCluster);
    RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.finalClusterFileName, partitionBalance);
    return minCluster;
}
Also used : Cluster(voldemort.cluster.Cluster)

Example 65 with Cluster

use of voldemort.cluster.Cluster in project voldemort by voldemort.

the class Repartitioner method balancePrimaryPartitions.

/**
     * This method balances primary partitions among nodes within a zone, and
     * optionally primary partitions among zones. The balancing is done at the
     * level of partitionIds. Such partition Id movement may, or may not, result
     * in data movement during a rebalancing. See RebalancePlan for the object
     * responsible for determining which partition-stores move where for a
     * specific repartitioning.
     * 
     * @param nextCandidateCluster
     * @param balanceZones indicates whether or not number of primary partitions
     *        per zone should be balanced.
     * @return updated cluster
     */
public static Cluster balancePrimaryPartitions(final Cluster nextCandidateCluster, boolean balanceZones) {
    System.out.println("Balance number of partitions across all nodes and zones.");
    Map<Integer, Integer> targetPartitionsPerZone;
    if (balanceZones) {
        targetPartitionsPerZone = Utils.distributeEvenlyIntoMap(nextCandidateCluster.getZoneIds(), nextCandidateCluster.getNumberOfPartitions());
        System.out.println("numPartitionsPerZone");
        for (int zoneId : nextCandidateCluster.getZoneIds()) {
            System.out.println(zoneId + " : " + nextCandidateCluster.getNumberOfPartitionsInZone(zoneId) + " -> " + targetPartitionsPerZone.get(zoneId));
        }
        System.out.println("numNodesPerZone");
        for (int zoneId : nextCandidateCluster.getZoneIds()) {
            System.out.println(zoneId + " : " + nextCandidateCluster.getNumberOfNodesInZone(zoneId));
        }
    } else {
        // Keep number of partitions per zone the same.
        targetPartitionsPerZone = new HashMap<Integer, Integer>();
        for (int zoneId : nextCandidateCluster.getZoneIds()) {
            targetPartitionsPerZone.put(zoneId, nextCandidateCluster.getNumberOfPartitionsInZone(zoneId));
        }
    }
    HashMap<Integer, List<Integer>> numPartitionsPerNodeByZone = getBalancedNumberOfPrimaryPartitionsPerNode(nextCandidateCluster, targetPartitionsPerZone);
    Pair<HashMap<Node, Integer>, HashMap<Node, Integer>> donorsAndStealers = getDonorsAndStealersForBalance(nextCandidateCluster, numPartitionsPerNodeByZone);
    HashMap<Node, Integer> donorNodes = donorsAndStealers.getFirst();
    List<Node> donorNodeKeys = new ArrayList<Node>(donorNodes.keySet());
    HashMap<Node, Integer> stealerNodes = donorsAndStealers.getSecond();
    List<Node> stealerNodeKeys = new ArrayList<Node>(stealerNodes.keySet());
    /*
         * There is no "intelligence" here about which partition IDs are moved
         * where. The RebalancePlan object owns determining how to move data
         * around to meet a specific repartitioning. That said, a little bit of
         * intelligence here may go a long way. For example, for zone expansion
         * data could be minimized by:
         * 
         * (1) Selecting a minimal # of partition IDs for the new zoneto
         * minimize how much the ring in existing zones is perturbed;
         * 
         * (2) Selecting partitions for the new zone from contiguous runs of
         * partition IDs in other zones that are not currently n-ary partitions
         * for other primary partitions;
         * 
         * (3) Some combination of (1) and (2)...
         */
    // Go over every stealerNode and steal partition Ids from donor nodes
    Cluster returnCluster = Cluster.cloneCluster(nextCandidateCluster);
    Collections.shuffle(stealerNodeKeys, new Random(System.currentTimeMillis()));
    for (Node stealerNode : stealerNodeKeys) {
        int partitionsToSteal = stealerNodes.get(stealerNode) - stealerNode.getNumberOfPartitions();
        System.out.println("Node (" + stealerNode.getId() + ") in zone (" + stealerNode.getZoneId() + ") has partitionsToSteal of " + partitionsToSteal);
        while (partitionsToSteal > 0) {
            Collections.shuffle(donorNodeKeys, new Random(System.currentTimeMillis()));
            // Repeatedly loop over donor nodes to distribute stealing
            for (Node donorNode : donorNodeKeys) {
                Node currentDonorNode = returnCluster.getNodeById(donorNode.getId());
                // Only steal from donor nodes with extra partitions
                int partitionsToDonate = currentDonorNode.getNumberOfPartitions() - donorNodes.get(donorNode);
                if (partitionsToDonate <= 0) {
                    continue;
                }
                List<Integer> donorPartitions = Lists.newArrayList(currentDonorNode.getPartitionIds());
                Collections.shuffle(donorPartitions, new Random(System.currentTimeMillis()));
                for (int donorPartition : donorPartitions) {
                    Cluster intermediateCluster = UpdateClusterUtils.createUpdatedCluster(returnCluster, stealerNode.getId(), Lists.newArrayList(donorPartition));
                    returnCluster = intermediateCluster;
                    partitionsToSteal--;
                    partitionsToDonate--;
                    System.out.println("Stealer node " + stealerNode.getId() + ", donor node " + currentDonorNode.getId() + ", partition stolen " + donorPartition);
                    if (partitionsToSteal == 0 || partitionsToDonate == 0)
                        break;
                }
                if (partitionsToSteal == 0)
                    break;
            }
        }
    }
    return returnCluster;
}
Also used : HashMap(java.util.HashMap) Node(voldemort.cluster.Node) ArrayList(java.util.ArrayList) Cluster(voldemort.cluster.Cluster) Random(java.util.Random) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

Cluster (voldemort.cluster.Cluster)197 Test (org.junit.Test)74 StoreDefinition (voldemort.store.StoreDefinition)74 Node (voldemort.cluster.Node)72 ArrayList (java.util.ArrayList)51 HashMap (java.util.HashMap)47 ByteArray (voldemort.utils.ByteArray)33 AdminClient (voldemort.client.protocol.admin.AdminClient)26 ClusterTestUtils (voldemort.ClusterTestUtils)25 VoldemortException (voldemort.VoldemortException)24 List (java.util.List)23 ClusterMapper (voldemort.xml.ClusterMapper)23 File (java.io.File)20 StoreDefinitionsMapper (voldemort.xml.StoreDefinitionsMapper)18 Zone (voldemort.cluster.Zone)17 Versioned (voldemort.versioning.Versioned)17 Properties (java.util.Properties)16 IOException (java.io.IOException)15 VoldemortServer (voldemort.server.VoldemortServer)15 RoutingStrategyFactory (voldemort.routing.RoutingStrategyFactory)14