use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class MoveReplicaCmd method moveReplica.
private void moveReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
log.info("moveReplica() : {}", Utils.toJSONString(message));
ocmh.checkRequired(message, COLLECTION_PROP, "targetNode");
String collection = message.getStr(COLLECTION_PROP);
String targetNode = message.getStr("targetNode");
String async = message.getStr(ASYNC);
DocCollection coll = clusterState.getCollection(collection);
if (coll == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " does not exist");
}
Replica replica = null;
if (message.containsKey(REPLICA_PROP)) {
String replicaName = message.getStr(REPLICA_PROP);
replica = coll.getReplica(replicaName);
if (replica == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " replica: " + replicaName + " does not exist");
}
} else {
ocmh.checkRequired(message, SHARD_ID_PROP, "fromNode");
String fromNode = message.getStr("fromNode");
String shardId = message.getStr(SHARD_ID_PROP);
Slice slice = clusterState.getCollection(collection).getSlice(shardId);
List<Replica> sliceReplicas = new ArrayList<>(slice.getReplicas());
Collections.shuffle(sliceReplicas, RANDOM);
for (Replica r : slice.getReplicas()) {
if (r.getNodeName().equals(fromNode)) {
replica = r;
}
}
if (replica == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " node: " + fromNode + " do not have any replica belong to shard: " + shardId);
}
}
log.info("Replica will be moved {}", replica);
Slice slice = null;
for (Slice s : coll.getSlices()) {
if (s.getReplicas().contains(replica)) {
slice = s;
}
}
assert slice != null;
Object dataDir = replica.get("dataDir");
if (dataDir != null && dataDir.toString().startsWith("hdfs:/")) {
moveHdfsReplica(clusterState, results, dataDir.toString(), targetNode, async, coll, replica, slice);
} else {
moveNormalReplica(clusterState, results, targetNode, async, coll, replica, slice);
}
}
use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class OverseerAutoReplicaFailoverThread method doWork.
private void doWork() {
// TODO: extract to configurable strategy class ??
ClusterState clusterState = zkStateReader.getClusterState();
//check if we have disabled autoAddReplicas cluster wide
String autoAddReplicas = zkStateReader.getClusterProperty(ZkStateReader.AUTO_ADD_REPLICAS, (String) null);
if (autoAddReplicas != null && autoAddReplicas.equals("false")) {
return;
}
if (clusterState != null) {
if (clusterState.getZkClusterStateVersion() != null && clusterState.getZkClusterStateVersion().equals(lastClusterStateVersion) && baseUrlForBadNodes.size() == 0 && liveNodes.equals(clusterState.getLiveNodes())) {
// nothing has changed, no work to do
return;
}
liveNodes = clusterState.getLiveNodes();
lastClusterStateVersion = clusterState.getZkClusterStateVersion();
Map<String, DocCollection> collections = clusterState.getCollectionsMap();
for (Map.Entry<String, DocCollection> entry : collections.entrySet()) {
log.debug("look at collection={}", entry.getKey());
DocCollection docCollection = entry.getValue();
if (!docCollection.getAutoAddReplicas()) {
log.debug("Collection {} is not setup to use autoAddReplicas, skipping..", docCollection.getName());
continue;
}
if (docCollection.getReplicationFactor() == null) {
log.debug("Skipping collection because it has no defined replicationFactor, name={}", docCollection.getName());
continue;
}
log.debug("Found collection, name={} replicationFactor={}", entry.getKey(), docCollection.getReplicationFactor());
Collection<Slice> slices = docCollection.getSlices();
for (Slice slice : slices) {
if (slice.getState() == Slice.State.ACTIVE) {
final Collection<DownReplica> downReplicas = new ArrayList<DownReplica>();
int goodReplicas = findDownReplicasInSlice(clusterState, docCollection, slice, downReplicas);
log.debug("collection={} replicationFactor={} goodReplicaCount={}", docCollection.getName(), docCollection.getReplicationFactor(), goodReplicas);
if (downReplicas.size() > 0 && goodReplicas < docCollection.getReplicationFactor()) {
// badReplicaMap.put(collection, badReplicas);
processBadReplicas(entry.getKey(), downReplicas);
} else if (goodReplicas > docCollection.getReplicationFactor()) {
log.debug("There are too many replicas");
}
}
}
}
}
}
use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class OverseerAutoReplicaFailoverThread method getBestCreateUrl.
/**
*
* @return the best node to replace the badReplica on or null if there is no
* such node
*/
static String getBestCreateUrl(ZkStateReader zkStateReader, DownReplica badReplica, Integer maxCoreCount) {
assert badReplica != null;
assert badReplica.collection != null;
assert badReplica.slice != null;
log.debug("getBestCreateUrl for " + badReplica.replica);
Map<String, Counts> counts = new HashMap<>();
Set<String> unsuitableHosts = new HashSet<>();
Set<String> liveNodes = new HashSet<>(zkStateReader.getClusterState().getLiveNodes());
Map<String, Integer> coresPerNode = new HashMap<>();
ClusterState clusterState = zkStateReader.getClusterState();
if (clusterState != null) {
Map<String, DocCollection> collections = clusterState.getCollectionsMap();
for (Map.Entry<String, DocCollection> entry : collections.entrySet()) {
String collection = entry.getKey();
log.debug("look at collection {} as possible create candidate", collection);
DocCollection docCollection = entry.getValue();
// TODO - only operate on collections with sharedfs failover = true ??
Collection<Slice> slices = docCollection.getSlices();
for (Slice slice : slices) {
// only look at active shards
if (slice.getState() == Slice.State.ACTIVE) {
log.debug("look at slice {} for collection {} as possible create candidate", slice.getName(), collection);
Collection<Replica> replicas = slice.getReplicas();
for (Replica replica : replicas) {
liveNodes.remove(replica.getNodeName());
String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
if (coresPerNode.containsKey(baseUrl)) {
Integer nodeCount = coresPerNode.get(baseUrl);
coresPerNode.put(baseUrl, nodeCount++);
} else {
coresPerNode.put(baseUrl, 1);
}
if (baseUrl.equals(badReplica.replica.getStr(ZkStateReader.BASE_URL_PROP))) {
continue;
}
// on a live node?
log.debug("collection={} nodename={} livenodes={}", collection, replica.getNodeName(), clusterState.getLiveNodes());
boolean live = clusterState.liveNodesContain(replica.getNodeName());
log.debug("collection={} look at replica {} as possible create candidate, live={}", collection, replica.getName(), live);
if (live) {
Counts cnt = counts.get(baseUrl);
if (cnt == null) {
cnt = new Counts();
}
if (badReplica.collection.getName().equals(collection)) {
cnt.negRankingWeight += 3;
cnt.collectionShardsOnNode += 1;
} else {
cnt.negRankingWeight += 1;
}
if (badReplica.collection.getName().equals(collection) && badReplica.slice.getName().equals(slice.getName())) {
cnt.ourReplicas++;
}
Integer maxShardsPerNode = badReplica.collection.getMaxShardsPerNode();
if (maxShardsPerNode == null) {
log.warn("maxShardsPerNode is not defined for collection, name=" + badReplica.collection.getName());
maxShardsPerNode = Integer.MAX_VALUE;
}
log.debug("collection={} node={} maxShardsPerNode={} maxCoresPerNode={} potential hosts={}", collection, baseUrl, maxShardsPerNode, maxCoreCount, cnt);
Collection<Replica> badSliceReplicas = null;
DocCollection c = clusterState.getCollection(badReplica.collection.getName());
if (c != null) {
Slice s = c.getSlice(badReplica.slice.getName());
if (s != null) {
badSliceReplicas = s.getReplicas();
}
}
boolean alreadyExistsOnNode = replicaAlreadyExistsOnNode(zkStateReader.getClusterState(), badSliceReplicas, badReplica, baseUrl);
if (unsuitableHosts.contains(baseUrl) || alreadyExistsOnNode || cnt.collectionShardsOnNode >= maxShardsPerNode || (maxCoreCount != null && coresPerNode.get(baseUrl) >= maxCoreCount)) {
counts.remove(baseUrl);
unsuitableHosts.add(baseUrl);
log.debug("not a candidate node, collection={} node={} max shards per node={} good replicas={}", collection, baseUrl, maxShardsPerNode, cnt);
} else {
counts.put(baseUrl, cnt);
log.debug("is a candidate node, collection={} node={} max shards per node={} good replicas={}", collection, baseUrl, maxShardsPerNode, cnt);
}
}
}
}
}
}
}
for (String node : liveNodes) {
counts.put(zkStateReader.getBaseUrlForNodeName(node), new Counts(0, 0));
}
if (counts.size() == 0) {
log.debug("no suitable hosts found for getBestCreateUrl for collection={}", badReplica.collection.getName());
return null;
}
ValueComparator vc = new ValueComparator(counts);
Map<String, Counts> sortedCounts = new TreeMap<String, Counts>(vc);
sortedCounts.putAll(counts);
log.debug("empty nodes={} for collection={}", liveNodes, badReplica.collection.getName());
log.debug("sorted hosts={} for collection={}", sortedCounts, badReplica.collection.getName());
log.debug("unsuitable hosts={} for collection={}", unsuitableHosts, badReplica.collection.getName());
return sortedCounts.keySet().iterator().next();
}
use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class OverseerCollectionMessageHandler method waitToSeeReplicasInState.
Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
Map<String, Replica> result = new HashMap<>();
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
while (true) {
DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName);
for (String coreName : coreNames) {
if (result.containsKey(coreName))
continue;
for (Slice slice : coll.getSlices()) {
for (Replica replica : slice.getReplicas()) {
if (coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
result.put(coreName, replica);
break;
}
}
}
}
if (result.size() == coreNames.size()) {
return result;
} else {
log.debug("Expecting {} cores but found {}", coreNames.size(), result.size());
}
if (timeout.hasTimedOut()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas: " + coreNames + " in cluster state.");
}
Thread.sleep(100);
}
}
use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class V2HttpCall method getDocCollection.
protected DocCollection getDocCollection(String collectionName) {
if (!cores.isZooKeeperAware()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Solr not running in cloud mode ");
}
ZkStateReader zkStateReader = cores.getZkController().getZkStateReader();
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
if (collection == null) {
collectionName = corename = lookupAliases(collectionName);
collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
}
return collection;
}
Aggregations