use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class HttpShardHandler method prepDistributed.
@Override
public void prepDistributed(ResponseBuilder rb) {
final SolrQueryRequest req = rb.req;
final SolrParams params = req.getParams();
final String shards = params.get(ShardParams.SHARDS);
// since the cost of grabbing cloud state is still up in the air, we grab it only
// if we need it.
ClusterState clusterState = null;
Map<String, Slice> slices = null;
CoreDescriptor coreDescriptor = req.getCore().getCoreDescriptor();
CloudDescriptor cloudDescriptor = coreDescriptor.getCloudDescriptor();
ZkController zkController = req.getCore().getCoreContainer().getZkController();
final ReplicaListTransformer replicaListTransformer = httpShardHandlerFactory.getReplicaListTransformer(req);
if (shards != null) {
List<String> lst = StrUtils.splitSmart(shards, ",", true);
rb.shards = lst.toArray(new String[lst.size()]);
rb.slices = new String[rb.shards.length];
if (zkController != null) {
// figure out which shards are slices
for (int i = 0; i < rb.shards.length; i++) {
if (rb.shards[i].indexOf('/') < 0) {
// this is a logical shard
rb.slices[i] = rb.shards[i];
rb.shards[i] = null;
}
}
}
} else if (zkController != null) {
// we weren't provided with an explicit list of slices to query via "shards", so use the cluster state
clusterState = zkController.getClusterState();
String shardKeys = params.get(ShardParams._ROUTE_);
// This will be the complete list of slices we need to query for this request.
slices = new HashMap<>();
// we need to find out what collections this request is for.
// A comma-separated list of specified collections.
// Eg: "collection1,collection2,collection3"
String collections = params.get("collection");
if (collections != null) {
// If there were one or more collections specified in the query, split
// each parameter and store as a separate member of a List.
List<String> collectionList = StrUtils.splitSmart(collections, ",", true);
// cloud state and add them to the Map 'slices'.
for (String collectionName : collectionList) {
// The original code produced <collection-name>_<shard-name> when the collections
// parameter was specified (see ClientUtils.appendMap)
// Is this necessary if ony one collection is specified?
// i.e. should we change multiCollection to collectionList.size() > 1?
addSlices(slices, clusterState, params, collectionName, shardKeys, true);
}
} else {
// just this collection
String collectionName = cloudDescriptor.getCollectionName();
addSlices(slices, clusterState, params, collectionName, shardKeys, false);
}
// Store the logical slices in the ResponseBuilder and create a new
// String array to hold the physical shards (which will be mapped
// later).
rb.slices = slices.keySet().toArray(new String[slices.size()]);
rb.shards = new String[rb.slices.length];
}
//
if (zkController != null) {
// Are we hosting the shard that this request is for, and are we active? If so, then handle it ourselves
// and make it a non-distributed request.
String ourSlice = cloudDescriptor.getShardId();
String ourCollection = cloudDescriptor.getCollectionName();
// Some requests may only be fulfilled by replicas of type Replica.Type.NRT
boolean onlyNrtReplicas = Boolean.TRUE == req.getContext().get(ONLY_NRT_REPLICAS);
if (rb.slices.length == 1 && rb.slices[0] != null && // handle the <collection>_<slice> format
(rb.slices[0].equals(ourSlice) || rb.slices[0].equals(ourCollection + "_" + ourSlice)) && cloudDescriptor.getLastPublished() == Replica.State.ACTIVE && (!onlyNrtReplicas || cloudDescriptor.getReplicaType() == Replica.Type.NRT)) {
// currently just a debugging parameter to check distrib search on a single node
boolean shortCircuit = params.getBool("shortCircuit", true);
String targetHandler = params.get(ShardParams.SHARDS_QT);
// if a different handler is specified, don't short-circuit
shortCircuit = shortCircuit && targetHandler == null;
if (shortCircuit) {
rb.isDistrib = false;
rb.shortCircuitedURL = ZkCoreNodeProps.getCoreUrl(zkController.getBaseUrl(), coreDescriptor.getName());
return;
}
// We shouldn't need to do anything to handle "shard.rows" since it was previously meant to be an optimization?
}
for (int i = 0; i < rb.shards.length; i++) {
final List<String> shardUrls;
if (rb.shards[i] != null) {
shardUrls = StrUtils.splitSmart(rb.shards[i], "|", true);
replicaListTransformer.transform(shardUrls);
} else {
if (clusterState == null) {
clusterState = zkController.getClusterState();
slices = clusterState.getSlicesMap(cloudDescriptor.getCollectionName());
}
String sliceName = rb.slices[i];
Slice slice = slices.get(sliceName);
if (slice == null) {
// Treat this the same as "all servers down" for a slice, and let things continue
// if partial results are acceptable
rb.shards[i] = "";
continue;
// throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no such shard: " + sliceName);
}
final Predicate<Replica> isShardLeader = new Predicate<Replica>() {
private Replica shardLeader = null;
@Override
public boolean test(Replica replica) {
if (shardLeader == null) {
try {
shardLeader = zkController.getZkStateReader().getLeaderRetry(cloudDescriptor.getCollectionName(), slice.getName());
} catch (InterruptedException e) {
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + slice.getName() + " in collection " + cloudDescriptor.getCollectionName(), e);
} catch (SolrException e) {
if (log.isDebugEnabled()) {
log.debug("Exception finding leader for shard {} in collection {}. Collection State: {}", slice.getName(), cloudDescriptor.getCollectionName(), zkController.getZkStateReader().getClusterState().getCollectionOrNull(cloudDescriptor.getCollectionName()));
}
throw e;
}
}
return replica.getName().equals(shardLeader.getName());
}
};
final List<Replica> eligibleSliceReplicas = collectEligibleReplicas(slice, clusterState, onlyNrtReplicas, isShardLeader);
replicaListTransformer.transform(eligibleSliceReplicas);
shardUrls = new ArrayList<>(eligibleSliceReplicas.size());
for (Replica replica : eligibleSliceReplicas) {
String url = ZkCoreNodeProps.getCoreUrl(replica);
shardUrls.add(url);
}
if (shardUrls.isEmpty()) {
boolean tolerant = rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false);
if (!tolerant) {
// stop the check when there are no replicas available for a shard
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "no servers hosting shard: " + rb.slices[i]);
}
}
}
// And now recreate the | delimited list of equivalent servers
rb.shards[i] = createSliceShardsStr(shardUrls);
}
}
String shards_rows = params.get(ShardParams.SHARDS_ROWS);
if (shards_rows != null) {
rb.shards_rows = Integer.parseInt(shards_rows);
}
String shards_start = params.get(ShardParams.SHARDS_START);
if (shards_start != null) {
rb.shards_start = Integer.parseInt(shards_start);
}
}
use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class SplitOp method execute.
@Override
public void execute(CoreAdminHandler.CallInfo it) throws Exception {
SolrParams params = it.req.getParams();
List<DocRouter.Range> ranges = null;
String[] pathsArr = params.getParams(PATH);
// ranges=a-b,c-d,e-f
String rangesStr = params.get(CoreAdminParams.RANGES);
if (rangesStr != null) {
String[] rangesArr = rangesStr.split(",");
if (rangesArr.length == 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "There must be at least one range specified to split an index");
} else {
ranges = new ArrayList<>(rangesArr.length);
for (String r : rangesArr) {
try {
ranges.add(DocRouter.DEFAULT.fromString(r));
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Exception parsing hexadecimal hash range: " + r, e);
}
}
}
}
String splitKey = params.get("split.key");
String[] newCoreNames = params.getParams("targetCore");
String cname = params.get(CoreAdminParams.CORE, "");
if ((pathsArr == null || pathsArr.length == 0) && (newCoreNames == null || newCoreNames.length == 0)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Either path or targetCore param must be specified");
}
log.info("Invoked split action for core: " + cname);
SolrCore core = it.handler.coreContainer.getCore(cname);
SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
List<SolrCore> newCores = null;
try {
// TODO: allow use of rangesStr in the future
List<String> paths = null;
int partitions = pathsArr != null ? pathsArr.length : newCoreNames.length;
DocRouter router = null;
String routeFieldName = null;
if (it.handler.coreContainer.isZooKeeperAware()) {
ClusterState clusterState = it.handler.coreContainer.getZkController().getClusterState();
String collectionName = req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName();
DocCollection collection = clusterState.getCollection(collectionName);
String sliceName = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
Slice slice = collection.getSlice(sliceName);
router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
if (ranges == null) {
DocRouter.Range currentRange = slice.getRange();
ranges = currentRange != null ? router.partitionRange(partitions, currentRange) : null;
}
// for back-compat with Solr 4.4
Object routerObj = collection.get(DOC_ROUTER);
if (routerObj != null && routerObj instanceof Map) {
Map routerProps = (Map) routerObj;
routeFieldName = (String) routerProps.get("field");
}
}
if (pathsArr == null) {
newCores = new ArrayList<>(partitions);
for (String newCoreName : newCoreNames) {
SolrCore newcore = it.handler.coreContainer.getCore(newCoreName);
if (newcore != null) {
newCores.add(newcore);
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Core with core name " + newCoreName + " expected but doesn't exist.");
}
}
} else {
paths = Arrays.asList(pathsArr);
}
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router, routeFieldName, splitKey);
core.getUpdateHandler().split(cmd);
// After the split has completed, someone (here?) should start the process of replaying the buffered updates.
} catch (Exception e) {
log.error("ERROR executing split:", e);
throw new RuntimeException(e);
} finally {
if (req != null)
req.close();
if (core != null)
core.close();
if (newCores != null) {
for (SolrCore newCore : newCores) {
newCore.close();
}
}
}
}
use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class PrepRecoveryOp method execute.
@Override
public void execute(CallInfo it) throws Exception {
assert TestInjection.injectPrepRecoveryOpPauseForever();
final SolrParams params = it.req.getParams();
String cname = params.get(CoreAdminParams.CORE);
if (cname == null) {
cname = "";
}
String nodeName = params.get("nodeName");
String coreNodeName = params.get("coreNodeName");
Replica.State waitForState = Replica.State.getState(params.get(ZkStateReader.STATE_PROP));
Boolean checkLive = params.getBool("checkLive");
Boolean onlyIfLeader = params.getBool("onlyIfLeader");
Boolean onlyIfLeaderActive = params.getBool("onlyIfLeaderActive");
CoreContainer coreContainer = it.handler.coreContainer;
// wait long enough for the leader conflict to work itself out plus a little extra
int conflictWaitMs = coreContainer.getZkController().getLeaderConflictResolveWait();
int maxTries = (int) Math.round(conflictWaitMs / 1000) + 3;
log.info("Going to wait for coreNodeName: {}, state: {}, checkLive: {}, onlyIfLeader: {}, onlyIfLeaderActive: {}, maxTime: {} s", coreNodeName, waitForState, checkLive, onlyIfLeader, onlyIfLeaderActive, maxTries);
Replica.State state = null;
boolean live = false;
int retry = 0;
while (true) {
try (SolrCore core = coreContainer.getCore(cname)) {
if (core == null && retry == Math.min(30, maxTries)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "core not found:" + cname);
}
if (core != null) {
if (onlyIfLeader != null && onlyIfLeader) {
if (!core.getCoreDescriptor().getCloudDescriptor().isLeader()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "We are not the leader");
}
}
// wait until we are sure the recovering node is ready
// to accept updates
CloudDescriptor cloudDescriptor = core.getCoreDescriptor().getCloudDescriptor();
String collectionName = cloudDescriptor.getCollectionName();
if (retry % 15 == 0) {
if (retry > 0 && log.isInfoEnabled())
log.info("After " + retry + " seconds, core " + cname + " (" + cloudDescriptor.getShardId() + " of " + cloudDescriptor.getCollectionName() + ") still does not have state: " + waitForState + "; forcing ClusterState update from ZooKeeper");
// force a cluster state update
coreContainer.getZkController().getZkStateReader().forceUpdateCollection(collectionName);
}
ClusterState clusterState = coreContainer.getZkController().getClusterState();
DocCollection collection = clusterState.getCollection(collectionName);
Slice slice = collection.getSlice(cloudDescriptor.getShardId());
if (slice != null) {
final Replica replica = slice.getReplicasMap().get(coreNodeName);
if (replica != null) {
state = replica.getState();
live = clusterState.liveNodesContain(nodeName);
final Replica.State localState = cloudDescriptor.getLastPublished();
// TODO: This is funky but I've seen this in testing where the replica asks the
// leader to be in recovery? Need to track down how that happens ... in the meantime,
// this is a safeguard
boolean leaderDoesNotNeedRecovery = (onlyIfLeader != null && onlyIfLeader && core.getName().equals(replica.getStr("core")) && waitForState == Replica.State.RECOVERING && localState == Replica.State.ACTIVE && state == Replica.State.ACTIVE);
if (leaderDoesNotNeedRecovery) {
log.warn("Leader " + core.getName() + " ignoring request to be in the recovering state because it is live and active.");
}
boolean onlyIfActiveCheckResult = onlyIfLeaderActive != null && onlyIfLeaderActive && localState != Replica.State.ACTIVE;
log.info("In WaitForState(" + waitForState + "): collection=" + collectionName + ", shard=" + slice.getName() + ", thisCore=" + core.getName() + ", leaderDoesNotNeedRecovery=" + leaderDoesNotNeedRecovery + ", isLeader? " + core.getCoreDescriptor().getCloudDescriptor().isLeader() + ", live=" + live + ", checkLive=" + checkLive + ", currentState=" + state.toString() + ", localState=" + localState + ", nodeName=" + nodeName + ", coreNodeName=" + coreNodeName + ", onlyIfActiveCheckResult=" + onlyIfActiveCheckResult + ", nodeProps: " + replica);
if (!onlyIfActiveCheckResult && replica != null && (state == waitForState || leaderDoesNotNeedRecovery)) {
if (checkLive == null) {
break;
} else if (checkLive && live) {
break;
} else if (!checkLive && !live) {
break;
}
}
}
}
}
if (retry++ == maxTries) {
String collection = null;
String leaderInfo = null;
String shardId = null;
try {
CloudDescriptor cloudDescriptor = core.getCoreDescriptor().getCloudDescriptor();
collection = cloudDescriptor.getCollectionName();
shardId = cloudDescriptor.getShardId();
leaderInfo = coreContainer.getZkController().getZkStateReader().getLeaderUrl(collection, shardId, 5000);
} catch (Exception exc) {
leaderInfo = "Not available due to: " + exc;
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "I was asked to wait on state " + waitForState + " for " + shardId + " in " + collection + " on " + nodeName + " but I still do not see the requested state. I see state: " + Objects.toString(state) + " live:" + live + " leader from ZK: " + leaderInfo);
}
if (coreContainer.isShutDown()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Solr is shutting down");
}
// solrcloud_debug
if (log.isDebugEnabled() && core != null) {
try {
LocalSolrQueryRequest r = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
CommitUpdateCommand commitCmd = new CommitUpdateCommand(r, false);
commitCmd.softCommit = true;
core.getUpdateHandler().commit(commitCmd);
RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
SolrIndexSearcher searcher = searchHolder.get();
try {
log.debug(core.getCoreContainer().getZkController().getNodeName() + " to replicate " + searcher.search(new MatchAllDocsQuery(), 1).totalHits + " gen:" + core.getDeletionPolicy().getLatestCommit().getGeneration() + " data:" + core.getDataDir());
} finally {
searchHolder.decref();
}
} catch (Exception e) {
log.debug("Error in solrcloud_debug block", e);
}
}
}
Thread.sleep(1000);
}
log.info("Waited coreNodeName: " + coreNodeName + ", state: " + waitForState + ", checkLive: " + checkLive + ", onlyIfLeader: " + onlyIfLeader + " for: " + retry + " seconds.");
}
use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class RebalanceLeaders method execute.
void execute() throws KeeperException, InterruptedException {
req.getParams().required().check(COLLECTION_PROP);
String collectionName = req.getParams().get(COLLECTION_PROP);
if (StringUtils.isBlank(collectionName)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format(Locale.ROOT, "The " + COLLECTION_PROP + " is required for the Rebalance Leaders command."));
}
coreContainer.getZkController().getZkStateReader().forceUpdateCollection(collectionName);
ClusterState clusterState = coreContainer.getZkController().getClusterState();
DocCollection dc = clusterState.getCollection(collectionName);
if (dc == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection '" + collectionName + "' does not exist, no action taken.");
}
Map<String, String> currentRequests = new HashMap<>();
int max = req.getParams().getInt(MAX_AT_ONCE_PROP, Integer.MAX_VALUE);
if (max <= 0)
max = Integer.MAX_VALUE;
int maxWaitSecs = req.getParams().getInt(MAX_WAIT_SECONDS_PROP, 60);
NamedList<Object> results = new NamedList<>();
boolean keepGoing = true;
for (Slice slice : dc.getSlices()) {
ensurePreferredIsLeader(results, slice, currentRequests);
if (currentRequests.size() == max) {
log.info("Queued " + max + " leader reassignments, waiting for some to complete.");
keepGoing = waitForLeaderChange(currentRequests, maxWaitSecs, false, results);
if (keepGoing == false) {
// If we've waited longer than specified, don't continue to wait!
break;
}
}
}
if (keepGoing == true) {
keepGoing = waitForLeaderChange(currentRequests, maxWaitSecs, true, results);
}
if (keepGoing == true) {
log.info("All leader reassignments completed.");
} else {
log.warn("Exceeded specified timeout of ." + maxWaitSecs + "' all leaders may not have been reassigned");
}
rsp.getValues().addAll(results);
}
use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class ManagedIndexSchema method getActiveReplicaCoreUrls.
protected static List<String> getActiveReplicaCoreUrls(ZkController zkController, String collection, String localCoreNodeName) {
List<String> activeReplicaCoreUrls = new ArrayList<>();
ZkStateReader zkStateReader = zkController.getZkStateReader();
ClusterState clusterState = zkStateReader.getClusterState();
Set<String> liveNodes = clusterState.getLiveNodes();
Collection<Slice> activeSlices = clusterState.getActiveSlices(collection);
if (activeSlices != null && activeSlices.size() > 0) {
for (Slice next : activeSlices) {
Map<String, Replica> replicasMap = next.getReplicasMap();
if (replicasMap != null) {
for (Map.Entry<String, Replica> entry : replicasMap.entrySet()) {
Replica replica = entry.getValue();
if (!localCoreNodeName.equals(replica.getName()) && replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName())) {
ZkCoreNodeProps replicaCoreProps = new ZkCoreNodeProps(replica);
activeReplicaCoreUrls.add(replicaCoreProps.getCoreUrl());
}
}
}
}
}
return activeReplicaCoreUrls;
}
Aggregations