use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class AssignTest method testAssignNode.
@Test
public void testAssignNode() throws Exception {
String cname = "collection1";
Map<String, DocCollection> collectionStates = new HashMap<>();
Map<String, Slice> slices = new HashMap<>();
Map<String, Replica> replicas = new HashMap<>();
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "state", ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString(), ZkStateReader.BASE_URL_PROP, "0.0.0.0", ZkStateReader.CORE_NAME_PROP, "core1", ZkStateReader.ROLES_PROP, null, ZkStateReader.NODE_NAME_PROP, "0_0_0_0", ZkStateReader.SHARD_ID_PROP, "shard1", ZkStateReader.COLLECTION_PROP, cname, ZkStateReader.NUM_SHARDS_PROP, "1", ZkStateReader.CORE_NODE_NAME_PROP, "core_node1");
Replica replica = new Replica("core_node1", m.getProperties());
replicas.put("core_node1", replica);
Slice slice = new Slice("slice1", replicas, new HashMap<String, Object>(0));
slices.put("slice1", slice);
DocRouter router = new ImplicitDocRouter();
DocCollection docCollection = new DocCollection(cname, slices, new HashMap<String, Object>(0), router);
collectionStates.put(cname, docCollection);
Set<String> liveNodes = new HashSet<>();
ClusterState state = new ClusterState(-1, liveNodes, collectionStates);
String nodeName = Assign.assignNode(state.getCollection("collection1"));
assertEquals("core_node2", nodeName);
}
use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class BackupCmd method call.
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP);
String backupName = message.getStr(NAME);
String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
Instant startTime = Instant.now();
CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer();
BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
BackupManager backupMgr = new BackupManager(repository, ocmh.zkStateReader);
// Backup location
URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
URI backupPath = repository.resolve(location, backupName);
//Validating if the directory already exists.
if (repository.exists(backupPath)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The backup directory already exists: " + backupPath);
}
// Create a directory to store backup details.
repository.createDirectory(backupPath);
String strategy = message.getStr(CollectionAdminParams.INDEX_BACKUP_STRATEGY, CollectionAdminParams.COPY_FILES_STRATEGY);
switch(strategy) {
case CollectionAdminParams.COPY_FILES_STRATEGY:
{
copyIndexFiles(backupPath, message, results);
break;
}
case CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY:
{
break;
}
}
log.info("Starting to backup ZK data for backupName={}", backupName);
//Download the configs
String configName = ocmh.zkStateReader.readConfigName(collectionName);
backupMgr.downloadConfigDir(location, backupName, configName);
//Save the collection's state. Can be part of the monolithic clusterstate.json or a individual state.json
//Since we don't want to distinguish we extract the state and back it up as a separate json
DocCollection collectionState = ocmh.zkStateReader.getClusterState().getCollection(collectionName);
backupMgr.writeCollectionState(location, backupName, collectionName, collectionState);
Properties properties = new Properties();
properties.put(BackupManager.BACKUP_NAME_PROP, backupName);
properties.put(BackupManager.COLLECTION_NAME_PROP, collectionName);
properties.put(COLL_CONF, configName);
properties.put(BackupManager.START_TIME_PROP, startTime.toString());
properties.put(BackupManager.INDEX_VERSION_PROP, Version.LATEST.toString());
//TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same.
//if they are not the same then we can throw an error or have an 'overwriteConfig' flag
//TODO save numDocs for the shardLeader. We can use it to sanity check the restore.
backupMgr.writeBackupProperties(location, backupName, properties);
log.info("Completed backing up ZK data for backupName={}", backupName);
}
use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class DeleteShardCmd method call.
@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP);
String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
log.info("Delete shard invoked");
Slice slice = clusterState.getSlice(collectionName, sliceId);
if (slice == null) {
if (clusterState.hasCollection(collectionName)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shard with name " + sliceId + " exists for collection " + collectionName);
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName);
}
}
// For now, only allow for deletions of Inactive slices or custom hashes (range==null).
// TODO: Add check for range gaps on Slice deletion
final Slice.State state = slice.getState();
if (!(slice.getRange() == null || state == Slice.State.INACTIVE || state == Slice.State.RECOVERY || state == Slice.State.CONSTRUCTION) || state == Slice.State.RECOVERY_FAILED) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The slice: " + slice.getName() + " is currently " + state + ". Only non-active (or custom-hashed) slices can be deleted.");
}
if (state == Slice.State.RECOVERY) {
// mark the slice as 'construction' and only then try to delete the cores
// see SOLR-9455
DistributedQueue inQueue = Overseer.getStateUpdateQueue(ocmh.zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
propMap.put(sliceId, Slice.State.CONSTRUCTION.toString());
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m));
}
String asyncId = message.getStr(ASYNC);
try {
List<ZkNodeProps> replicas = getReplicasForSlice(collectionName, slice);
CountDownLatch cleanupLatch = new CountDownLatch(replicas.size());
for (ZkNodeProps r : replicas) {
final ZkNodeProps replica = r.plus(message.getProperties()).plus("parallel", "true").plus(ASYNC, asyncId);
log.info("Deleting replica for collection={} shard={} on node={}", replica.getStr(COLLECTION_PROP), replica.getStr(SHARD_ID_PROP), replica.getStr(CoreAdminParams.NODE));
NamedList deleteResult = new NamedList();
try {
((DeleteReplicaCmd) ocmh.commandMap.get(DELETEREPLICA)).deleteReplica(clusterState, replica, deleteResult, () -> {
cleanupLatch.countDown();
if (deleteResult.get("failure") != null) {
synchronized (results) {
results.add("failure", String.format(Locale.ROOT, "Failed to delete replica for collection=%s shard=%s" + " on node=%s", replica.getStr(COLLECTION_PROP), replica.getStr(SHARD_ID_PROP), replica.getStr(NODE_NAME_PROP)));
}
}
SimpleOrderedMap success = (SimpleOrderedMap) deleteResult.get("success");
if (success != null) {
synchronized (results) {
results.add("success", success);
}
}
});
} catch (KeeperException e) {
log.warn("Error deleting replica: " + r, e);
cleanupLatch.countDown();
} catch (Exception e) {
log.warn("Error deleting replica: " + r, e);
cleanupLatch.countDown();
throw e;
}
}
log.debug("Waiting for delete shard action to complete");
cleanupLatch.await(5, TimeUnit.MINUTES);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP, sliceId);
ZkStateReader zkStateReader = ocmh.zkStateReader;
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
// wait for a while until we don't see the shard
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
boolean removed = false;
while (!timeout.hasTimedOut()) {
Thread.sleep(100);
DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
removed = collection.getSlice(sliceId) == null;
if (removed) {
// just a bit of time so it's more likely other readers see on return
Thread.sleep(100);
break;
}
}
if (!removed) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully remove collection: " + collectionName + " shard: " + sliceId);
}
log.info("Successfully deleted collection: " + collectionName + ", shard: " + sliceId);
} catch (SolrException e) {
throw e;
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collectionName + " shard: " + sliceId, e);
}
}
use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class DeleteReplicaCmd method deleteReplicaBasedOnCount.
/**
* Delete replicas based on count for a given collection. If a shard is passed, uses that
* else deletes given num replicas across all shards for the given collection.
*/
void deleteReplicaBasedOnCount(ClusterState clusterState, ZkNodeProps message, NamedList results, Runnable onComplete, boolean parallel) throws KeeperException, InterruptedException {
ocmh.checkRequired(message, COLLECTION_PROP, COUNT_PROP);
int count = Integer.parseInt(message.getStr(COUNT_PROP));
String collectionName = message.getStr(COLLECTION_PROP);
String shard = message.getStr(SHARD_ID_PROP);
DocCollection coll = clusterState.getCollection(collectionName);
Slice slice = null;
//Validate if shard is passed.
if (shard != null) {
slice = coll.getSlice(shard);
if (slice == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid shard name : " + shard + " in collection : " + collectionName);
}
}
Map<Slice, Set<String>> shardToReplicasMapping = new HashMap<Slice, Set<String>>();
if (slice != null) {
Set<String> replicasToBeDeleted = pickReplicasTobeDeleted(slice, shard, collectionName, count);
shardToReplicasMapping.put(slice, replicasToBeDeleted);
} else {
//If there are many replicas left, remove the rest based on count.
Collection<Slice> allSlices = coll.getSlices();
for (Slice individualSlice : allSlices) {
Set<String> replicasToBeDeleted = pickReplicasTobeDeleted(individualSlice, individualSlice.getName(), collectionName, count);
shardToReplicasMapping.put(individualSlice, replicasToBeDeleted);
}
}
for (Slice shardSlice : shardToReplicasMapping.keySet()) {
String shardId = shardSlice.getName();
Set<String> replicas = shardToReplicasMapping.get(shardSlice);
//callDeleteReplica on all replicas
for (String replica : replicas) {
log.debug("Deleting replica {} for shard {} based on count {}", replica, shardId, count);
deleteCore(shardSlice, collectionName, replica, message, shard, results, onComplete, parallel);
}
results.add("shard_id", shardId);
results.add("replicas_deleted", replicas);
}
}
use of org.apache.solr.common.cloud.DocCollection in project lucene-solr by apache.
the class AddReplicaCmd method addReplica.
ZkNodeProps addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results, Runnable onComplete) throws KeeperException, InterruptedException {
log.info("addReplica() : {}", Utils.toJSONString(message));
String collection = message.getStr(COLLECTION_PROP);
String node = message.getStr(CoreAdminParams.NODE);
String shard = message.getStr(SHARD_ID_PROP);
String coreName = message.getStr(CoreAdminParams.NAME);
Replica.Type replicaType = Replica.Type.valueOf(message.getStr(ZkStateReader.REPLICA_TYPE, Replica.Type.NRT.name()).toUpperCase(Locale.ROOT));
boolean parallel = message.getBool("parallel", false);
if (StringUtils.isBlank(coreName)) {
coreName = message.getStr(CoreAdminParams.PROPERTY_PREFIX + CoreAdminParams.NAME);
}
final String asyncId = message.getStr(ASYNC);
DocCollection coll = clusterState.getCollection(collection);
if (coll == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " does not exist");
}
if (coll.getSlice(shard) == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " shard: " + shard + " does not exist");
}
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
boolean skipCreateReplicaInClusterState = message.getBool(SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, false);
// Kind of unnecessary, but it does put the logic of whether to override maxShardsPerNode in one place.
if (!skipCreateReplicaInClusterState) {
node = getNodesForNewReplicas(clusterState, collection, shard, 1, node, ocmh.overseer.getZkController().getCoreContainer()).get(0).nodeName;
}
log.info("Node Identified {} for creating new replica", node);
if (!clusterState.liveNodesContain(node)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Node: " + node + " is not live");
}
if (coreName == null) {
coreName = Assign.buildCoreName(coll, shard, replicaType);
} else if (!skipCreateReplicaInClusterState) {
//Validate that the core name is unique in that collection
for (Slice slice : coll.getSlices()) {
for (Replica replica : slice.getReplicas()) {
String replicaCoreName = replica.getStr(CORE_NAME_PROP);
if (coreName.equals(replicaCoreName)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Another replica with the same core name already exists" + " for this collection");
}
}
}
}
ModifiableSolrParams params = new ModifiableSolrParams();
ZkStateReader zkStateReader = ocmh.zkStateReader;
if (!Overseer.isLegacy(zkStateReader)) {
if (!skipCreateReplicaInClusterState) {
ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(), ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.SHARD_ID_PROP, shard, ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(), ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(node), ZkStateReader.NODE_NAME_PROP, node, ZkStateReader.REPLICA_TYPE, replicaType.name());
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
}
params.set(CoreAdminParams.CORE_NODE_NAME, ocmh.waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
}
String configName = zkStateReader.readConfigName(collection);
String routeKey = message.getStr(ShardParams._ROUTE_);
String dataDir = message.getStr(CoreAdminParams.DATA_DIR);
String instanceDir = message.getStr(CoreAdminParams.INSTANCE_DIR);
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.CREATE.toString());
params.set(CoreAdminParams.NAME, coreName);
params.set(COLL_CONF, configName);
params.set(CoreAdminParams.COLLECTION, collection);
params.set(CoreAdminParams.REPLICA_TYPE, replicaType.name());
if (shard != null) {
params.set(CoreAdminParams.SHARD, shard);
} else if (routeKey != null) {
Collection<Slice> slices = coll.getRouter().getSearchSlicesSingle(routeKey, null, coll);
if (slices.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No active shard serving _route_=" + routeKey + " found");
} else {
params.set(CoreAdminParams.SHARD, slices.iterator().next().getName());
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify either 'shard' or _route_ param");
}
if (dataDir != null) {
params.set(CoreAdminParams.DATA_DIR, dataDir);
}
if (instanceDir != null) {
params.set(CoreAdminParams.INSTANCE_DIR, instanceDir);
}
ocmh.addPropertyParams(message, params);
// For tracking async calls.
Map<String, String> requestMap = new HashMap<>();
ocmh.sendShardRequest(node, params, shardHandler, asyncId, requestMap);
final String fnode = node;
final String fcoreName = coreName;
Runnable runnable = () -> {
ocmh.processResponses(results, shardHandler, true, "ADDREPLICA failed to create replica", asyncId, requestMap);
ocmh.waitForCoreNodeName(collection, fnode, fcoreName);
if (onComplete != null)
onComplete.run();
};
if (!parallel) {
runnable.run();
} else {
ocmh.tpe.submit(runnable);
}
return new ZkNodeProps(ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.SHARD_ID_PROP, shard, ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.NODE_NAME_PROP, node);
}
Aggregations