use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class OverseerTest method testPerformance.
@Test
@Ignore
public void testPerformance() throws Exception {
String zkDir = createTempDir("OverseerTest.testPerformance").toFile().getAbsolutePath();
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient controllerClient = null;
SolrZkClient overseerClient = null;
ZkStateReader reader = null;
MockZKController mockController = null;
try {
server.run();
controllerClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
ZkController.createClusterZkNodes(controllerClient);
reader = new ZkStateReader(controllerClient);
reader.createClusterStateWatchersAndUpdate();
mockController = new MockZKController(server.getZkAddress(), "node1");
final int MAX_COLLECTIONS = 10, MAX_CORES = 10, MAX_STATE_CHANGES = 20000, STATE_FORMAT = 2;
for (int i = 0; i < MAX_COLLECTIONS; i++) {
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.CREATE.toLower(), "name", "perf" + i, ZkStateReader.NUM_SHARDS_PROP, "1", "stateFormat", String.valueOf(STATE_FORMAT), ZkStateReader.REPLICATION_FACTOR, "1", ZkStateReader.MAX_SHARDS_PER_NODE, "1");
DistributedQueue q = Overseer.getStateUpdateQueue(controllerClient);
q.offer(Utils.toJSON(m));
controllerClient.makePath("/collections/perf" + i, true);
}
for (int i = 0, j = 0, k = 0; i < MAX_STATE_CHANGES; i++, j++, k++) {
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString(), ZkStateReader.NODE_NAME_PROP, "node1", ZkStateReader.CORE_NAME_PROP, "core" + k, ZkStateReader.CORE_NODE_NAME_PROP, "node1", ZkStateReader.COLLECTION_PROP, "perf" + j, ZkStateReader.NUM_SHARDS_PROP, "1", ZkStateReader.BASE_URL_PROP, "http://" + "node1" + "/solr/");
DistributedQueue q = Overseer.getStateUpdateQueue(controllerClient);
q.offer(Utils.toJSON(m));
if (j >= MAX_COLLECTIONS - 1)
j = 0;
if (k >= MAX_CORES - 1)
k = 0;
if (i > 0 && i % 100 == 0)
log.info("Published {} items", i);
}
// let's publish a sentinel collection which we'll use to wait for overseer to complete operations
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString(), ZkStateReader.NODE_NAME_PROP, "node1", ZkStateReader.CORE_NAME_PROP, "core1", ZkStateReader.CORE_NODE_NAME_PROP, "node1", ZkStateReader.COLLECTION_PROP, "perf_sentinel", ZkStateReader.NUM_SHARDS_PROP, "1", ZkStateReader.BASE_URL_PROP, "http://" + "node1" + "/solr/");
DistributedQueue q = Overseer.getStateUpdateQueue(controllerClient);
q.offer(Utils.toJSON(m));
Timer t = new Timer();
Timer.Context context = t.time();
try {
overseerClient = electNewOverseer(server.getZkAddress());
assertTrue(overseers.size() > 0);
while (true) {
ClusterState state = reader.getClusterState();
if (state.hasCollection("perf_sentinel")) {
break;
}
Thread.sleep(1000);
}
} finally {
context.stop();
}
log.info("Overseer loop finished processing: ");
printTimingStats(t);
Overseer overseer = overseers.get(0);
Overseer.Stats stats = overseer.getStats();
String[] interestingOps = { "state", "update_state", "am_i_leader", "" };
Arrays.sort(interestingOps);
for (Map.Entry<String, Overseer.Stat> entry : stats.getStats().entrySet()) {
String op = entry.getKey();
if (Arrays.binarySearch(interestingOps, op) < 0)
continue;
Overseer.Stat stat = entry.getValue();
log.info("op: {}, success: {}, failure: {}", op, stat.success.get(), stat.errors.get());
Timer timer = stat.requestTime;
printTimingStats(timer);
}
} finally {
close(overseerClient);
close(mockController);
close(controllerClient);
close(reader);
server.shutdown();
}
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class OverseerTest method testRemovalOfLastReplica.
@Test
public void testRemovalOfLastReplica() throws Exception {
// between 1 and 4 replicas
final Integer numReplicas = 1 + random().nextInt(4);
// between 1 and 4 shards
final Integer numShards = 1 + random().nextInt(4);
final String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
final ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
ZkStateReader zkStateReader = null;
SolrZkClient overseerClient = null;
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
zkStateReader = new ZkStateReader(zkClient);
zkStateReader.createClusterStateWatchersAndUpdate();
overseerClient = electNewOverseer(server.getZkAddress());
DistributedQueue q = Overseer.getStateUpdateQueue(zkClient);
// create collection
{
final Integer maxShardsPerNode = numReplicas * numShards;
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.CREATE.toLower(), "name", COLLECTION, ZkStateReader.NUM_SHARDS_PROP, numShards.toString(), ZkStateReader.REPLICATION_FACTOR, "1", ZkStateReader.MAX_SHARDS_PER_NODE, maxShardsPerNode.toString());
q.offer(Utils.toJSON(m));
}
waitForCollections(zkStateReader, COLLECTION);
// create nodes with state recovering
for (int rr = 1; rr <= numReplicas; ++rr) {
for (int ss = 1; ss <= numShards; ++ss) {
final int N = (numReplicas - rr) * numShards + ss;
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr", ZkStateReader.SHARD_ID_PROP, "shard" + ss, ZkStateReader.NODE_NAME_PROP, "node" + N, ZkStateReader.COLLECTION_PROP, COLLECTION, ZkStateReader.CORE_NAME_PROP, "core" + N, ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString());
q.offer(Utils.toJSON(m));
}
}
// verify recovering
for (int rr = 1; rr <= numReplicas; ++rr) {
for (int ss = 1; ss <= numShards; ++ss) {
final int N = (numReplicas - rr) * numShards + ss;
verifyReplicaStatus(zkStateReader, COLLECTION, "shard" + ss, "core_node" + N, Replica.State.RECOVERING);
}
}
// publish node states (active)
for (int rr = 1; rr <= numReplicas; ++rr) {
for (int ss = 1; ss <= numShards; ++ss) {
final int N = (numReplicas - rr) * numShards + ss;
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr", ZkStateReader.NODE_NAME_PROP, "node" + N, ZkStateReader.COLLECTION_PROP, COLLECTION, ZkStateReader.CORE_NAME_PROP, "core" + N, ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString());
q.offer(Utils.toJSON(m));
}
}
// verify active
for (int rr = 1; rr <= numReplicas; ++rr) {
for (int ss = 1; ss <= numShards; ++ss) {
final int N = (numReplicas - rr) * numShards + ss;
verifyReplicaStatus(zkStateReader, COLLECTION, "shard" + ss, "core_node" + N, Replica.State.ACTIVE);
}
}
// delete node
for (int rr = 1; rr <= numReplicas; ++rr) {
for (int ss = 1; ss <= numShards; ++ss) {
final int N = (numReplicas - rr) * numShards + ss;
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(), ZkStateReader.COLLECTION_PROP, COLLECTION, ZkStateReader.CORE_NODE_NAME_PROP, "core_node" + N);
q.offer(Utils.toJSON(m));
{
int iterationsLeft = 100;
while (iterationsLeft-- > 0) {
final Slice slice = zkStateReader.getClusterState().getSlice(COLLECTION, "shard" + ss);
if (null == slice || null == slice.getReplicasMap().get("core_node" + N)) {
break;
}
if (VERBOSE)
log.info("still seeing {} shard{} core_node{}, rechecking in 50ms ({} iterations left)", COLLECTION, ss, N, iterationsLeft);
Thread.sleep(50);
}
}
final DocCollection docCollection = zkStateReader.getClusterState().getCollection(COLLECTION);
assertTrue("found no " + COLLECTION, (null != docCollection));
final Slice slice = docCollection.getSlice("shard" + ss);
assertTrue("found no " + COLLECTION + " shard" + ss + " slice after removal of replica " + rr + " of " + numReplicas, (null != slice));
final Collection<Replica> replicas = slice.getReplicas();
assertEquals("wrong number of " + COLLECTION + " shard" + ss + " replicas left, replicas=" + replicas, numReplicas - rr, replicas.size());
}
}
} finally {
close(overseerClient);
close(zkStateReader);
close(zkClient);
server.shutdown();
}
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class LeaderElectionTest method testBasic.
@Test
public void testBasic() throws Exception {
LeaderElector elector = new LeaderElector(zkClient);
ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr/", ZkStateReader.CORE_NAME_PROP, "");
ElectionContext context = new ShardLeaderElectionContextBase(elector, "shard2", "collection1", "dummynode1", props, zkStateReader);
elector.setup(context);
elector.joinElection(context, false);
assertEquals("http://127.0.0.1/solr/", getLeaderUrl("collection1", "shard2"));
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class MigrateCmd method migrateKey.
private void migrateKey(ClusterState clusterState, DocCollection sourceCollection, Slice sourceSlice, DocCollection targetCollection, Slice targetSlice, String splitKey, int timeout, NamedList results, String asyncId, ZkNodeProps message) throws Exception {
String tempSourceCollectionName = "split_" + sourceSlice.getName() + "_temp_" + targetSlice.getName();
ZkStateReader zkStateReader = ocmh.zkStateReader;
if (clusterState.hasCollection(tempSourceCollectionName)) {
log.info("Deleting temporary collection: " + tempSourceCollectionName);
Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
try {
ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
clusterState = zkStateReader.getClusterState();
} catch (Exception e) {
log.warn("Unable to clean up existing temporary collection: " + tempSourceCollectionName, e);
}
}
CompositeIdRouter sourceRouter = (CompositeIdRouter) sourceCollection.getRouter();
DocRouter.Range keyHashRange = sourceRouter.keyHashRange(splitKey);
ShardHandlerFactory shardHandlerFactory = ocmh.shardHandlerFactory;
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
log.info("Hash range for split.key: {} is: {}", splitKey, keyHashRange);
// intersect source range, keyHashRange and target range
// this is the range that has to be split from source and transferred to target
DocRouter.Range splitRange = ocmh.intersect(targetSlice.getRange(), ocmh.intersect(sourceSlice.getRange(), keyHashRange));
if (splitRange == null) {
log.info("No common hashes between source shard: {} and target shard: {}", sourceSlice.getName(), targetSlice.getName());
return;
}
log.info("Common hash range between source shard: {} and target shard: {} = " + splitRange, sourceSlice.getName(), targetSlice.getName());
Replica targetLeader = zkStateReader.getLeaderRetry(targetCollection.getName(), targetSlice.getName(), 10000);
// For tracking async calls.
Map<String, String> requestMap = new HashMap<>();
log.info("Asking target leader node: " + targetLeader.getNodeName() + " core: " + targetLeader.getStr("core") + " to buffer updates");
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTBUFFERUPDATES.toString());
params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to buffer updates", asyncId, requestMap);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.ADDROUTINGRULE.toLower(), COLLECTION_PROP, sourceCollection.getName(), SHARD_ID_PROP, sourceSlice.getName(), "routeKey", SolrIndexSplitter.getRouteKey(splitKey) + "!", "range", splitRange.toString(), "targetCollection", targetCollection.getName(), "expireAt", RoutingRule.makeExpiryAt(timeout));
log.info("Adding routing rule: " + m);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
// wait for a while until we see the new rule
log.info("Waiting to see routing rule updated in clusterstate");
TimeOut waitUntil = new TimeOut(60, TimeUnit.SECONDS);
boolean added = false;
while (!waitUntil.hasTimedOut()) {
Thread.sleep(100);
sourceCollection = zkStateReader.getClusterState().getCollection(sourceCollection.getName());
sourceSlice = sourceCollection.getSlice(sourceSlice.getName());
Map<String, RoutingRule> rules = sourceSlice.getRoutingRules();
if (rules != null) {
RoutingRule rule = rules.get(SolrIndexSplitter.getRouteKey(splitKey) + "!");
if (rule != null && rule.getRouteRanges().contains(splitRange)) {
added = true;
break;
}
}
}
if (!added) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not add routing rule: " + m);
}
log.info("Routing rule added successfully");
// Create temp core on source shard
Replica sourceLeader = zkStateReader.getLeaderRetry(sourceCollection.getName(), sourceSlice.getName(), 10000);
// create a temporary collection with just one node on the shard leader
String configName = zkStateReader.readConfigName(sourceCollection.getName());
Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, CREATE.toLower(), NAME, tempSourceCollectionName, NRT_REPLICAS, 1, NUM_SLICES, 1, COLL_CONF, configName, CREATE_NODE_SET, sourceLeader.getNodeName());
if (asyncId != null) {
String internalAsyncId = asyncId + Math.abs(System.nanoTime());
props.put(ASYNC, internalAsyncId);
}
log.info("Creating temporary collection: " + props);
ocmh.commandMap.get(CREATE).call(clusterState, new ZkNodeProps(props), results);
// refresh cluster state
clusterState = zkStateReader.getClusterState();
Slice tempSourceSlice = clusterState.getCollection(tempSourceCollectionName).getSlices().iterator().next();
Replica tempSourceLeader = zkStateReader.getLeaderRetry(tempSourceCollectionName, tempSourceSlice.getName(), 120000);
String tempCollectionReplica1 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 1);
String coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, sourceLeader.getNodeName(), tempCollectionReplica1);
// wait for the replicas to be seen as active on temp source leader
log.info("Asking source leader to wait for: " + tempCollectionReplica1 + " to be alive on: " + sourceLeader.getNodeName());
CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
cmd.setCoreName(tempCollectionReplica1);
cmd.setNodeName(sourceLeader.getNodeName());
cmd.setCoreNodeName(coreNodeName);
cmd.setState(Replica.State.ACTIVE);
cmd.setCheckLive(true);
cmd.setOnlyIfLeader(true);
// we don't want this to happen asynchronously
ocmh.sendShardRequest(tempSourceLeader.getNodeName(), new ModifiableSolrParams(cmd.getParams()), shardHandler, null, null);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection leader" + " or timed out waiting for it to come up", asyncId, requestMap);
log.info("Asking source leader to split index");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.SPLIT.toString());
params.set(CoreAdminParams.CORE, sourceLeader.getStr("core"));
params.add(CoreAdminParams.TARGET_CORE, tempSourceLeader.getStr("core"));
params.set(CoreAdminParams.RANGES, splitRange.toString());
params.set("split.key", splitKey);
String tempNodeName = sourceLeader.getNodeName();
ocmh.sendShardRequest(tempNodeName, params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to invoke SPLIT core admin command", asyncId, requestMap);
log.info("Creating a replica of temporary collection: {} on the target leader node: {}", tempSourceCollectionName, targetLeader.getNodeName());
String tempCollectionReplica2 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 2);
props = new HashMap<>();
props.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
props.put(COLLECTION_PROP, tempSourceCollectionName);
props.put(SHARD_ID_PROP, tempSourceSlice.getName());
props.put("node", targetLeader.getNodeName());
props.put(CoreAdminParams.NAME, tempCollectionReplica2);
// copy over property params:
for (String key : message.keySet()) {
if (key.startsWith(COLL_PROP_PREFIX)) {
props.put(key, message.getStr(key));
}
}
// add async param
if (asyncId != null) {
props.put(ASYNC, asyncId);
}
((AddReplicaCmd) ocmh.commandMap.get(ADDREPLICA)).addReplica(clusterState, new ZkNodeProps(props), results, null);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create replica of " + "temporary collection in target leader node.", asyncId, requestMap);
coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, targetLeader.getNodeName(), tempCollectionReplica2);
// wait for the replicas to be seen as active on temp source leader
log.info("Asking temp source leader to wait for: " + tempCollectionReplica2 + " to be alive on: " + targetLeader.getNodeName());
cmd = new CoreAdminRequest.WaitForState();
cmd.setCoreName(tempSourceLeader.getStr("core"));
cmd.setNodeName(targetLeader.getNodeName());
cmd.setCoreNodeName(coreNodeName);
cmd.setState(Replica.State.ACTIVE);
cmd.setCheckLive(true);
cmd.setOnlyIfLeader(true);
params = new ModifiableSolrParams(cmd.getParams());
ocmh.sendShardRequest(tempSourceLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection" + " replica or timed out waiting for them to come up", asyncId, requestMap);
log.info("Successfully created replica of temp source collection on target leader node");
log.info("Requesting merge of temp source collection replica to target leader");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.MERGEINDEXES.toString());
params.set(CoreAdminParams.CORE, targetLeader.getStr("core"));
params.set(CoreAdminParams.SRC_CORE, tempCollectionReplica2);
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
String msg = "MIGRATE failed to merge " + tempCollectionReplica2 + " to " + targetLeader.getStr("core") + " on node: " + targetLeader.getNodeName();
ocmh.processResponses(results, shardHandler, true, msg, asyncId, requestMap);
log.info("Asking target leader to apply buffered updates");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to apply buffered updates", asyncId, requestMap);
try {
log.info("Deleting temporary collection: " + tempSourceCollectionName);
props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
} catch (Exception e) {
log.error("Unable to delete temporary collection: " + tempSourceCollectionName + ". Please remove it manually", e);
}
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class RecoveryStrategy method doSyncOrReplicateRecovery.
// TODO: perhaps make this grab a new core each time through the loop to handle core reloads?
public final void doSyncOrReplicateRecovery(SolrCore core) throws KeeperException, InterruptedException {
boolean replayed = false;
boolean successfulRecovery = false;
UpdateLog ulog;
ulog = core.getUpdateHandler().getUpdateLog();
if (ulog == null) {
SolrException.log(LOG, "No UpdateLog found - cannot recover.");
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor());
return;
}
// we temporary ignore peersync for tlog replicas
boolean firstTime = replicaType != Replica.Type.TLOG;
List<Long> recentVersions;
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
recentVersions = recentUpdates.getVersions(ulog.getNumRecordsToKeep());
} catch (Exception e) {
SolrException.log(LOG, "Corrupt tlog - ignoring.", e);
recentVersions = new ArrayList<>(0);
}
List<Long> startingVersions = ulog.getStartingVersions();
if (startingVersions != null && recoveringAfterStartup) {
try {
// index of the start of the old list in the current list
int oldIdx = 0;
long firstStartingVersion = startingVersions.size() > 0 ? startingVersions.get(0) : 0;
for (; oldIdx < recentVersions.size(); oldIdx++) {
if (recentVersions.get(oldIdx) == firstStartingVersion)
break;
}
if (oldIdx > 0) {
LOG.info("####### Found new versions added after startup: num=[{}]", oldIdx);
LOG.info("###### currentVersions=[{}]", recentVersions);
}
LOG.info("###### startupVersions=[{}]", startingVersions);
} catch (Exception e) {
SolrException.log(LOG, "Error getting recent versions.", e);
recentVersions = new ArrayList<>(0);
}
}
if (recoveringAfterStartup) {
// if we're recovering after startup (i.e. we have been down), then we need to know what the last versions were
// when we went down. We may have received updates since then.
recentVersions = startingVersions;
try {
if ((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0) {
// last operation at the time of startup had the GAP flag set...
// this means we were previously doing a full index replication
// that probably didn't complete and buffering updates in the
// meantime.
LOG.info("Looks like a previous replication recovery did not complete - skipping peer sync.");
// skip peersync
firstTime = false;
}
} catch (Exception e) {
SolrException.log(LOG, "Error trying to get ulog starting operation.", e);
// skip peersync
firstTime = false;
}
}
if (replicaType == Replica.Type.TLOG) {
zkController.stopReplicationFromLeader(coreName);
}
Future<RecoveryInfo> replayFuture = null;
while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) {
// don't use interruption or it will close channels though
try {
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
ZkNodeProps leaderprops = zkStateReader.getLeaderRetry(cloudDesc.getCollectionName(), cloudDesc.getShardId());
final String leaderBaseUrl = leaderprops.getStr(ZkStateReader.BASE_URL_PROP);
final String leaderCoreName = leaderprops.getStr(ZkStateReader.CORE_NAME_PROP);
String leaderUrl = ZkCoreNodeProps.getCoreUrl(leaderBaseUrl, leaderCoreName);
String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
boolean isLeader = leaderUrl.equals(ourUrl);
if (isLeader && !cloudDesc.isLeader()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Cloud state still says we are leader.");
}
if (cloudDesc.isLeader()) {
// we are now the leader - no one else must have been suitable
LOG.warn("We have not yet recovered - but we are now the leader!");
LOG.info("Finished recovery process.");
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
return;
}
LOG.info("Begin buffering updates. core=[{}]", coreName);
ulog.bufferUpdates();
replayed = false;
LOG.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl, ourUrl);
zkController.publish(core.getCoreDescriptor(), Replica.State.RECOVERING);
final Slice slice = zkStateReader.getClusterState().getSlice(cloudDesc.getCollectionName(), cloudDesc.getShardId());
try {
prevSendPreRecoveryHttpUriRequest.abort();
} catch (NullPointerException e) {
// okay
}
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
sendPrepRecoveryCmd(leaderBaseUrl, leaderCoreName, slice);
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
// discussion around current value)
try {
Thread.sleep(waitForUpdatesWithStaleStatePauseMilliSeconds);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
// first thing we just try to sync
if (firstTime) {
// only try sync the first time through the loop
firstTime = false;
LOG.info("Attempting to PeerSync from [{}] - recoveringAfterStartup=[{}]", leaderUrl, recoveringAfterStartup);
// System.out.println("Attempting to PeerSync from " + leaderUrl
// + " i am:" + zkController.getNodeName());
PeerSync peerSync = new PeerSync(core, Collections.singletonList(leaderUrl), ulog.getNumRecordsToKeep(), false, false);
peerSync.setStartingVersions(recentVersions);
boolean syncSuccess = peerSync.sync().isSuccess();
if (syncSuccess) {
SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
// force open a new searcher
core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
LOG.info("PeerSync stage of recovery was successful.");
// solrcloud_debug
cloudDebugLog(core, "synced");
LOG.info("Replaying updates buffered during PeerSync.");
replay(core);
replayed = true;
// sync success
successfulRecovery = true;
return;
}
LOG.info("PeerSync Recovery was not successful - trying replication.");
}
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
LOG.info("Starting Replication Recovery.");
try {
replicate(zkController.getNodeName(), core, leaderprops);
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
replayFuture = replay(core);
replayed = true;
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
LOG.info("Replication Recovery was successful.");
successfulRecovery = true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.warn("Recovery was interrupted", e);
close = true;
} catch (Exception e) {
SolrException.log(LOG, "Error while trying to recover", e);
}
} catch (Exception e) {
SolrException.log(LOG, "Error while trying to recover. core=" + coreName, e);
} finally {
if (!replayed) {
// dropBufferedUpdate()s currently only supports returning to ACTIVE state, which risks additional updates
// being added w/o UpdateLog.FLAG_GAP, hence losing the info on restart that we are not up-to-date.
// For now, ulog will simply remain in BUFFERING state, and an additional call to bufferUpdates() will
// reset our starting point for playback.
LOG.info("Replay not started, or was not successful... still buffering updates.");
/** this prev code is retained in case we want to switch strategies.
try {
ulog.dropBufferedUpdates();
} catch (Exception e) {
SolrException.log(log, "", e);
}
**/
}
if (successfulRecovery) {
LOG.info("Registering as Active after recovery.");
try {
if (replicaType == Replica.Type.TLOG) {
zkController.startReplicationFromLeader(coreName, true);
}
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
} catch (Exception e) {
LOG.error("Could not publish as ACTIVE after succesful recovery", e);
successfulRecovery = false;
}
if (successfulRecovery) {
close = true;
recoveryListener.recovered();
}
}
}
if (!successfulRecovery) {
// Or do a fall off retry...
try {
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
LOG.error("Recovery failed - trying again... (" + retries + ")");
retries++;
if (retries >= maxRetries) {
SolrException.log(LOG, "Recovery failed - max retries exceeded (" + retries + ").");
try {
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor());
} catch (Exception e) {
SolrException.log(LOG, "Could not publish that recovery failed", e);
}
break;
}
} catch (Exception e) {
SolrException.log(LOG, "An error has occurred during recovery", e);
}
try {
// Wait an exponential interval between retries, start at 5 seconds and work up to a minute.
// If we're at attempt >= 4, there's no point computing pow(2, retries) because the result
// will always be the minimum of the two (12). Since we sleep at 5 seconds sub-intervals in
// order to check if we were closed, 12 is chosen as the maximum loopCount (5s * 12 = 1m).
double loopCount = retries < 4 ? Math.min(Math.pow(2, retries), 12) : 12;
LOG.info("Wait [{}] seconds before trying to recover again (attempt={})", loopCount, retries);
for (int i = 0; i < loopCount; i++) {
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
// check if someone closed us
break;
}
Thread.sleep(startingRecoveryDelayMilliSeconds);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.warn("Recovery was interrupted.", e);
close = true;
}
}
}
// then we still need to update version bucket seeds after recovery
if (successfulRecovery && replayFuture == null) {
LOG.info("Updating version bucket highest from index after successful recovery.");
core.seedVersionBuckets();
}
LOG.info("Finished recovery process, successful=[{}]", Boolean.toString(successfulRecovery));
}
Aggregations