use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class ImportTextDumpToBDB method main.
public static void main(String[] argv) throws Exception {
OptionParser parser = getParser();
OptionSet options = parser.parse(argv);
validateOptions(options);
String inputPath = (String) options.valueOf("input");
String storeBdbFolderPath = (String) options.valueOf("bdb");
String clusterXmlPath = (String) options.valueOf("cluster-xml");
String storesXmlPath = (String) options.valueOf("stores-xml");
Integer nodeId = (Integer) options.valueOf("node-id");
File input = new File(inputPath);
List<File> dataFiles = new ArrayList<File>();
if (input.isDirectory()) {
File[] files = input.listFiles();
if (files != null)
Collections.addAll(dataFiles, files);
} else if (input.isFile()) {
dataFiles.add(input);
} else {
System.err.println(inputPath + "is not file or directory");
}
File storeBdbFolder = new File(storeBdbFolderPath);
final String storeName = storeBdbFolder.getName();
Cluster cluster = new ClusterMapper().readCluster(new File(clusterXmlPath));
List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new File(storesXmlPath));
StoreDefinition storeDef = null;
for (StoreDefinition sd : storeDefs) {
if (sd.getName() != null && sd.getName().equals(storeName)) {
storeDef = sd;
}
}
if (storeDef == null) {
throw new VoldemortException("StoreNotfound: " + storeName);
}
RoutingStrategy routingStrategy = new RoutingStrategyFactory().updateRoutingStrategy(storeDef, cluster);
Properties properties = new Properties();
properties.put("node.id", "0");
properties.put("voldemort.home", storeBdbFolder.getParent());
VoldemortConfig voldemortConfig = new VoldemortConfig(properties);
voldemortConfig.setBdbDataDirectory(storeBdbFolder.getParent());
voldemortConfig.setEnableJmx(false);
voldemortConfig.setBdbOneEnvPerStore(true);
BdbStorageConfiguration bdbConfiguration = new BdbStorageConfiguration(voldemortConfig);
class MockStoreDefinition extends StoreDefinition {
public MockStoreDefinition() {
super(storeName, null, null, null, null, null, null, null, 0, null, 0, null, 0, null, null, null, null, null, null, null, null, null, null, null, null, 0);
}
@Override
public boolean hasMemoryFootprint() {
return false;
}
}
StoreDefinition mockStoreDef = new MockStoreDefinition();
StorageEngine<ByteArray, byte[], byte[]> engine = bdbConfiguration.getStore(mockStoreDef, routingStrategy);
long reportIntervalMs = 10000L;
long lastCount = 0;
long lastInserted = 0;
Reporter<Boolean> rp = new Reporter<Boolean>(reportIntervalMs);
long count = 0;
long inserted = 0;
for (File f : dataFiles) {
try {
BufferedReader bufferedReader = new BufferedReader(new FileReader(f), READER_BUFFER_SIZE);
engine.beginBatchModifications();
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
break;
}
Pair<ByteArray, Versioned<byte[]>> entry;
try {
entry = lineToEntry(line);
} catch (Exception e) {
System.err.println("Skipping line: " + line);
e.printStackTrace();
continue;
}
ByteArray key = entry.getFirst();
List<Node> nodeList = routingStrategy.routeRequest(key.get());
for (Node node : nodeList) {
if (nodeId == node.getId()) {
try {
engine.put(key, entry.getSecond(), null);
inserted++;
} catch (ObsoleteVersionException e) {
e.printStackTrace();
}
break;
}
}
count++;
final Long countObject = count;
final Long insertedObject = inserted;
Boolean reported = rp.tryReport(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
System.out.print(String.format("Imported %15d entries; Inserted %15d entries", countObject, insertedObject));
return true;
}
});
if (reported != null) {
long importSpeed = (count - lastCount) / (reportIntervalMs / 1000);
long insertSpeed = (inserted - lastInserted) / (reportIntervalMs / 1000);
System.out.println(String.format("; ImportSpeed: %8d/s; InsertSpeed: %8d/s ", importSpeed, insertSpeed));
lastCount = count;
lastInserted = inserted;
}
}
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
engine.endBatchModifications();
}
}
engine.close();
System.out.println(String.format("Finished importing %d entries (%d inserted, rest discarded)", count, inserted));
}
use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class PartitionAnalysisCLI method main.
public static void main(String[] args) throws Exception {
setupParser();
OptionSet options = getValidOptions(args);
String clusterXML = (String) options.valueOf("cluster");
String storesXML = (String) options.valueOf("stores");
Cluster currentCluster = new ClusterMapper().readCluster(new File(clusterXML));
List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new File(storesXML));
PartitionBalance partitionBalance = new PartitionBalance(currentCluster, storeDefs);
System.out.println(partitionBalance);
}
use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class Repartitioner method repeatedlyBalanceContiguousPartitionsPerZone.
/**
* Loops over cluster and repeatedly tries to break up contiguous runs of
* partitions. After each phase of breaking up contiguous partitions, random
* partitions are selected to move between zones to balance the number of
* partitions in each zone. The second phase may re-introduce contiguous
* partition runs in another zone. Therefore, this overall process is
* repeated multiple times.
*
* @param nextCandidateCluster
* @param maxContiguousPartitionsPerZone See RebalanceCLI.
* @return updated cluster
*/
public static Cluster repeatedlyBalanceContiguousPartitionsPerZone(final Cluster nextCandidateCluster, final int maxContiguousPartitionsPerZone) {
System.out.println("Looping to evenly balance partitions across zones while limiting contiguous partitions");
// This loop is hard to make definitive. I.e., there are corner cases
// for small clusters and/or clusters with few partitions for which it
// may be impossible to achieve tight limits on contiguous run lenghts.
// Therefore, a constant number of loops are run. Note that once the
// goal is reached, the loop becomes a no-op.
int repeatContigBalance = 10;
Cluster returnCluster = nextCandidateCluster;
for (int i = 0; i < repeatContigBalance; i++) {
returnCluster = balanceContiguousPartitionsPerZone(returnCluster, maxContiguousPartitionsPerZone);
returnCluster = balancePrimaryPartitions(returnCluster, false);
System.out.println("Completed round of balancing contiguous partitions: round " + (i + 1) + " of " + repeatContigBalance);
}
return returnCluster;
}
use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class Repartitioner method repartition.
/**
* Runs a number of distinct algorithms over the specified clusters/store
* defs to better balance partition IDs over nodes such that all nodes have
* similar iops and capacity usage.
*
* The algorithms (in order):
* <ul>
* <li>Get rid of contiguous runs of partition IDs within a zone. Such runs
* make balancing load overall more difficult.
* <li>Balance partition IDs among zones and/or among nodes within zones.
* <li>Randomly swap partition IDs among nodes to improve overall balance.
* (Any swap that improves balance is accepted.)
* <li>Greedily swap partition IDs among nodes to improve overall balance.
* (Some number of swaps are considered and the best of which is accepted.)
* </ul>
*
* This method is used for three key use cases:
* <ul>
* <li>Shuffling : Distribute partition IDs better for an existing cluster.
* <li>Cluster expansion : Distribute partition IDs to take advantage of new
* nodes (added to some of the zones).
* <li>Zone expansion : Distribute partition IDs into a new zone.
* </ul>
*
* @param currentCluster current cluster
* @param currentStoreDefs current store defs
* @param interimCluster interim cluster; needed for cluster or zone
* expansion, otherwise pass in same as currentCluster.
* @param finalStoreDefs final store defs; needed for zone expansion,
* otherwise pass in same as currentStores.
* @param outputDir Directory in which to dump cluster xml and analysis
* files.
* @param attempts Number of distinct repartitionings to attempt, the best
* of which is returned.
* @param disableNodeBalancing Disables the core algorithm that balances
* primaries among nodes within each zone.
* @param disableZoneBalancing For the core algorithm that balances
* primaries among nodes in each zone, disable balancing primaries
* among zones.
* @param enableRandomSwaps Enables random swap optimization.
* @param randomSwapAttempts
* @param randomSwapSuccesses
* @param randomSwapZoneIds
* @param enableGreedySwaps Enables greedy swap optimization.
* @param greedySwapAttempts
* @param greedySwapMaxPartitionsPerNode
* @param greedySwapMaxPartitionsPerZone
* @param greedySwapZoneIds
* @param maxContiguousPartitionsPerZone
* @return "final cluster" that has had all specified balancing algorithms
* run against it. The number of zones and number of nodes will
* match that of the specified "interim cluster".
*/
public static Cluster repartition(final Cluster currentCluster, final List<StoreDefinition> currentStoreDefs, final Cluster interimCluster, final List<StoreDefinition> finalStoreDefs, final String outputDir, final int attempts, final boolean disableNodeBalancing, final boolean disableZoneBalancing, final boolean enableRandomSwaps, final int randomSwapAttempts, final int randomSwapSuccesses, final List<Integer> randomSwapZoneIds, final boolean enableGreedySwaps, final int greedySwapAttempts, final int greedySwapMaxPartitionsPerNode, final int greedySwapMaxPartitionsPerZone, final List<Integer> greedySwapZoneIds, final int maxContiguousPartitionsPerZone) {
PartitionBalance partitionBalance = new PartitionBalance(currentCluster, currentStoreDefs);
RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.currentClusterFileName, partitionBalance);
Cluster minCluster = interimCluster;
double minUtility = Double.MAX_VALUE;
for (int attempt = 0; attempt < attempts; attempt++) {
Cluster nextCandidateCluster = interimCluster;
if (maxContiguousPartitionsPerZone > 0) {
nextCandidateCluster = repeatedlyBalanceContiguousPartitionsPerZone(nextCandidateCluster, maxContiguousPartitionsPerZone);
}
if (!disableNodeBalancing) {
nextCandidateCluster = balancePrimaryPartitions(nextCandidateCluster, !disableZoneBalancing);
}
if (enableRandomSwaps) {
nextCandidateCluster = randomShufflePartitions(nextCandidateCluster, randomSwapAttempts, randomSwapSuccesses, randomSwapZoneIds, finalStoreDefs);
}
if (enableGreedySwaps) {
nextCandidateCluster = greedyShufflePartitions(nextCandidateCluster, greedySwapAttempts, greedySwapMaxPartitionsPerNode, greedySwapMaxPartitionsPerZone, greedySwapZoneIds, finalStoreDefs);
}
RebalanceUtils.validateCurrentFinalCluster(currentCluster, nextCandidateCluster);
System.out.println("-------------------------\n");
partitionBalance = new PartitionBalance(nextCandidateCluster, finalStoreDefs);
double currentUtility = partitionBalance.getUtility();
System.out.println("Optimization number " + attempt + ": " + currentUtility + " max/min ratio");
System.out.println("-------------------------\n");
System.out.println(PartitionBalanceUtils.analyzeInvalidMetadataRate(interimCluster, finalStoreDefs, nextCandidateCluster, finalStoreDefs));
if (currentUtility <= minUtility) {
minUtility = currentUtility;
minCluster = nextCandidateCluster;
RebalanceUtils.dumpClusterToFile(outputDir, RebalanceUtils.finalClusterFileName + attempt, minCluster);
RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.finalClusterFileName + attempt, partitionBalance);
}
System.out.println("-------------------------\n");
}
System.out.println("\n==========================");
System.out.println("Final distribution");
partitionBalance = new PartitionBalance(minCluster, finalStoreDefs);
System.out.println(partitionBalance);
RebalanceUtils.dumpClusterToFile(outputDir, RebalanceUtils.finalClusterFileName, minCluster);
RebalanceUtils.dumpAnalysisToFile(outputDir, RebalanceUtils.finalClusterFileName, partitionBalance);
return minCluster;
}
use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class Repartitioner method balancePrimaryPartitions.
/**
* This method balances primary partitions among nodes within a zone, and
* optionally primary partitions among zones. The balancing is done at the
* level of partitionIds. Such partition Id movement may, or may not, result
* in data movement during a rebalancing. See RebalancePlan for the object
* responsible for determining which partition-stores move where for a
* specific repartitioning.
*
* @param nextCandidateCluster
* @param balanceZones indicates whether or not number of primary partitions
* per zone should be balanced.
* @return updated cluster
*/
public static Cluster balancePrimaryPartitions(final Cluster nextCandidateCluster, boolean balanceZones) {
System.out.println("Balance number of partitions across all nodes and zones.");
Map<Integer, Integer> targetPartitionsPerZone;
if (balanceZones) {
targetPartitionsPerZone = Utils.distributeEvenlyIntoMap(nextCandidateCluster.getZoneIds(), nextCandidateCluster.getNumberOfPartitions());
System.out.println("numPartitionsPerZone");
for (int zoneId : nextCandidateCluster.getZoneIds()) {
System.out.println(zoneId + " : " + nextCandidateCluster.getNumberOfPartitionsInZone(zoneId) + " -> " + targetPartitionsPerZone.get(zoneId));
}
System.out.println("numNodesPerZone");
for (int zoneId : nextCandidateCluster.getZoneIds()) {
System.out.println(zoneId + " : " + nextCandidateCluster.getNumberOfNodesInZone(zoneId));
}
} else {
// Keep number of partitions per zone the same.
targetPartitionsPerZone = new HashMap<Integer, Integer>();
for (int zoneId : nextCandidateCluster.getZoneIds()) {
targetPartitionsPerZone.put(zoneId, nextCandidateCluster.getNumberOfPartitionsInZone(zoneId));
}
}
HashMap<Integer, List<Integer>> numPartitionsPerNodeByZone = getBalancedNumberOfPrimaryPartitionsPerNode(nextCandidateCluster, targetPartitionsPerZone);
Pair<HashMap<Node, Integer>, HashMap<Node, Integer>> donorsAndStealers = getDonorsAndStealersForBalance(nextCandidateCluster, numPartitionsPerNodeByZone);
HashMap<Node, Integer> donorNodes = donorsAndStealers.getFirst();
List<Node> donorNodeKeys = new ArrayList<Node>(donorNodes.keySet());
HashMap<Node, Integer> stealerNodes = donorsAndStealers.getSecond();
List<Node> stealerNodeKeys = new ArrayList<Node>(stealerNodes.keySet());
/*
* There is no "intelligence" here about which partition IDs are moved
* where. The RebalancePlan object owns determining how to move data
* around to meet a specific repartitioning. That said, a little bit of
* intelligence here may go a long way. For example, for zone expansion
* data could be minimized by:
*
* (1) Selecting a minimal # of partition IDs for the new zoneto
* minimize how much the ring in existing zones is perturbed;
*
* (2) Selecting partitions for the new zone from contiguous runs of
* partition IDs in other zones that are not currently n-ary partitions
* for other primary partitions;
*
* (3) Some combination of (1) and (2)...
*/
// Go over every stealerNode and steal partition Ids from donor nodes
Cluster returnCluster = Cluster.cloneCluster(nextCandidateCluster);
Collections.shuffle(stealerNodeKeys, new Random(System.currentTimeMillis()));
for (Node stealerNode : stealerNodeKeys) {
int partitionsToSteal = stealerNodes.get(stealerNode) - stealerNode.getNumberOfPartitions();
System.out.println("Node (" + stealerNode.getId() + ") in zone (" + stealerNode.getZoneId() + ") has partitionsToSteal of " + partitionsToSteal);
while (partitionsToSteal > 0) {
Collections.shuffle(donorNodeKeys, new Random(System.currentTimeMillis()));
// Repeatedly loop over donor nodes to distribute stealing
for (Node donorNode : donorNodeKeys) {
Node currentDonorNode = returnCluster.getNodeById(donorNode.getId());
// Only steal from donor nodes with extra partitions
int partitionsToDonate = currentDonorNode.getNumberOfPartitions() - donorNodes.get(donorNode);
if (partitionsToDonate <= 0) {
continue;
}
List<Integer> donorPartitions = Lists.newArrayList(currentDonorNode.getPartitionIds());
Collections.shuffle(donorPartitions, new Random(System.currentTimeMillis()));
for (int donorPartition : donorPartitions) {
Cluster intermediateCluster = UpdateClusterUtils.createUpdatedCluster(returnCluster, stealerNode.getId(), Lists.newArrayList(donorPartition));
returnCluster = intermediateCluster;
partitionsToSteal--;
partitionsToDonate--;
System.out.println("Stealer node " + stealerNode.getId() + ", donor node " + currentDonorNode.getId() + ", partition stolen " + donorPartition);
if (partitionsToSteal == 0 || partitionsToDonate == 0)
break;
}
if (partitionsToSteal == 0)
break;
}
}
}
return returnCluster;
}
Aggregations