use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class RecoveryStrategy method doSyncOrReplicateRecovery.
// TODO: perhaps make this grab a new core each time through the loop to handle core reloads?
public final void doSyncOrReplicateRecovery(SolrCore core) throws KeeperException, InterruptedException {
boolean replayed = false;
boolean successfulRecovery = false;
UpdateLog ulog;
ulog = core.getUpdateHandler().getUpdateLog();
if (ulog == null) {
SolrException.log(LOG, "No UpdateLog found - cannot recover.");
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor());
return;
}
// we temporary ignore peersync for tlog replicas
boolean firstTime = replicaType != Replica.Type.TLOG;
List<Long> recentVersions;
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
recentVersions = recentUpdates.getVersions(ulog.getNumRecordsToKeep());
} catch (Exception e) {
SolrException.log(LOG, "Corrupt tlog - ignoring.", e);
recentVersions = new ArrayList<>(0);
}
List<Long> startingVersions = ulog.getStartingVersions();
if (startingVersions != null && recoveringAfterStartup) {
try {
// index of the start of the old list in the current list
int oldIdx = 0;
long firstStartingVersion = startingVersions.size() > 0 ? startingVersions.get(0) : 0;
for (; oldIdx < recentVersions.size(); oldIdx++) {
if (recentVersions.get(oldIdx) == firstStartingVersion)
break;
}
if (oldIdx > 0) {
LOG.info("####### Found new versions added after startup: num=[{}]", oldIdx);
LOG.info("###### currentVersions=[{}]", recentVersions);
}
LOG.info("###### startupVersions=[{}]", startingVersions);
} catch (Exception e) {
SolrException.log(LOG, "Error getting recent versions.", e);
recentVersions = new ArrayList<>(0);
}
}
if (recoveringAfterStartup) {
// if we're recovering after startup (i.e. we have been down), then we need to know what the last versions were
// when we went down. We may have received updates since then.
recentVersions = startingVersions;
try {
if ((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0) {
// last operation at the time of startup had the GAP flag set...
// this means we were previously doing a full index replication
// that probably didn't complete and buffering updates in the
// meantime.
LOG.info("Looks like a previous replication recovery did not complete - skipping peer sync.");
// skip peersync
firstTime = false;
}
} catch (Exception e) {
SolrException.log(LOG, "Error trying to get ulog starting operation.", e);
// skip peersync
firstTime = false;
}
}
if (replicaType == Replica.Type.TLOG) {
zkController.stopReplicationFromLeader(coreName);
}
Future<RecoveryInfo> replayFuture = null;
while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) {
// don't use interruption or it will close channels though
try {
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
ZkNodeProps leaderprops = zkStateReader.getLeaderRetry(cloudDesc.getCollectionName(), cloudDesc.getShardId());
final String leaderBaseUrl = leaderprops.getStr(ZkStateReader.BASE_URL_PROP);
final String leaderCoreName = leaderprops.getStr(ZkStateReader.CORE_NAME_PROP);
String leaderUrl = ZkCoreNodeProps.getCoreUrl(leaderBaseUrl, leaderCoreName);
String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
boolean isLeader = leaderUrl.equals(ourUrl);
if (isLeader && !cloudDesc.isLeader()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Cloud state still says we are leader.");
}
if (cloudDesc.isLeader()) {
// we are now the leader - no one else must have been suitable
LOG.warn("We have not yet recovered - but we are now the leader!");
LOG.info("Finished recovery process.");
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
return;
}
LOG.info("Begin buffering updates. core=[{}]", coreName);
ulog.bufferUpdates();
replayed = false;
LOG.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl, ourUrl);
zkController.publish(core.getCoreDescriptor(), Replica.State.RECOVERING);
final Slice slice = zkStateReader.getClusterState().getSlice(cloudDesc.getCollectionName(), cloudDesc.getShardId());
try {
prevSendPreRecoveryHttpUriRequest.abort();
} catch (NullPointerException e) {
// okay
}
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
sendPrepRecoveryCmd(leaderBaseUrl, leaderCoreName, slice);
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
// discussion around current value)
try {
Thread.sleep(waitForUpdatesWithStaleStatePauseMilliSeconds);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
// first thing we just try to sync
if (firstTime) {
// only try sync the first time through the loop
firstTime = false;
LOG.info("Attempting to PeerSync from [{}] - recoveringAfterStartup=[{}]", leaderUrl, recoveringAfterStartup);
// System.out.println("Attempting to PeerSync from " + leaderUrl
// + " i am:" + zkController.getNodeName());
PeerSync peerSync = new PeerSync(core, Collections.singletonList(leaderUrl), ulog.getNumRecordsToKeep(), false, false);
peerSync.setStartingVersions(recentVersions);
boolean syncSuccess = peerSync.sync().isSuccess();
if (syncSuccess) {
SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
// force open a new searcher
core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
LOG.info("PeerSync stage of recovery was successful.");
// solrcloud_debug
cloudDebugLog(core, "synced");
LOG.info("Replaying updates buffered during PeerSync.");
replay(core);
replayed = true;
// sync success
successfulRecovery = true;
return;
}
LOG.info("PeerSync Recovery was not successful - trying replication.");
}
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
LOG.info("Starting Replication Recovery.");
try {
replicate(zkController.getNodeName(), core, leaderprops);
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
replayFuture = replay(core);
replayed = true;
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
LOG.info("Replication Recovery was successful.");
successfulRecovery = true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.warn("Recovery was interrupted", e);
close = true;
} catch (Exception e) {
SolrException.log(LOG, "Error while trying to recover", e);
}
} catch (Exception e) {
SolrException.log(LOG, "Error while trying to recover. core=" + coreName, e);
} finally {
if (!replayed) {
// dropBufferedUpdate()s currently only supports returning to ACTIVE state, which risks additional updates
// being added w/o UpdateLog.FLAG_GAP, hence losing the info on restart that we are not up-to-date.
// For now, ulog will simply remain in BUFFERING state, and an additional call to bufferUpdates() will
// reset our starting point for playback.
LOG.info("Replay not started, or was not successful... still buffering updates.");
/** this prev code is retained in case we want to switch strategies.
try {
ulog.dropBufferedUpdates();
} catch (Exception e) {
SolrException.log(log, "", e);
}
**/
}
if (successfulRecovery) {
LOG.info("Registering as Active after recovery.");
try {
if (replicaType == Replica.Type.TLOG) {
zkController.startReplicationFromLeader(coreName, true);
}
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
} catch (Exception e) {
LOG.error("Could not publish as ACTIVE after succesful recovery", e);
successfulRecovery = false;
}
if (successfulRecovery) {
close = true;
recoveryListener.recovered();
}
}
}
if (!successfulRecovery) {
// Or do a fall off retry...
try {
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
break;
}
LOG.error("Recovery failed - trying again... (" + retries + ")");
retries++;
if (retries >= maxRetries) {
SolrException.log(LOG, "Recovery failed - max retries exceeded (" + retries + ").");
try {
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor());
} catch (Exception e) {
SolrException.log(LOG, "Could not publish that recovery failed", e);
}
break;
}
} catch (Exception e) {
SolrException.log(LOG, "An error has occurred during recovery", e);
}
try {
// Wait an exponential interval between retries, start at 5 seconds and work up to a minute.
// If we're at attempt >= 4, there's no point computing pow(2, retries) because the result
// will always be the minimum of the two (12). Since we sleep at 5 seconds sub-intervals in
// order to check if we were closed, 12 is chosen as the maximum loopCount (5s * 12 = 1m).
double loopCount = retries < 4 ? Math.min(Math.pow(2, retries), 12) : 12;
LOG.info("Wait [{}] seconds before trying to recover again (attempt={})", loopCount, retries);
for (int i = 0; i < loopCount; i++) {
if (isClosed()) {
LOG.info("RecoveryStrategy has been closed");
// check if someone closed us
break;
}
Thread.sleep(startingRecoveryDelayMilliSeconds);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.warn("Recovery was interrupted.", e);
close = true;
}
}
}
// then we still need to update version bucket seeds after recovery
if (successfulRecovery && replayFuture == null) {
LOG.info("Updating version bucket highest from index after successful recovery.");
core.seedVersionBuckets();
}
LOG.info("Finished recovery process, successful=[{}]", Boolean.toString(successfulRecovery));
}
use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class CdcrRequestHandler method handleShardCheckpointAction.
/**
* Retrieve the version number of the latest entry of the {@link org.apache.solr.update.UpdateLog}.
*/
private void handleShardCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) {
if (!leaderStateManager.amILeader()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Action '" + CdcrParams.CdcrAction.SHARDCHECKPOINT + "' sent to non-leader replica");
}
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
VersionInfo versionInfo = ulog.getVersionInfo();
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
long maxVersionFromRecent = recentUpdates.getMaxRecentVersion();
long maxVersionFromIndex = versionInfo.getMaxVersionFromIndex(req.getSearcher());
log.info("Found maxVersionFromRecent {} maxVersionFromIndex {}", maxVersionFromRecent, maxVersionFromIndex);
// there is no race with ongoing bootstrap because we don't expect any updates to come from the source
long maxVersion = Math.max(maxVersionFromIndex, maxVersionFromRecent);
if (maxVersion == 0L) {
maxVersion = -1;
}
rsp.add(CdcrParams.CHECKPOINT, maxVersion);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Action '" + CdcrParams.CdcrAction.SHARDCHECKPOINT + "' could not read max version");
}
}
use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class ZkController method publish.
/**
* Publish core state to overseer.
*/
public void publish(final CoreDescriptor cd, final Replica.State state, boolean updateLastState, boolean forcePublish) throws KeeperException, InterruptedException {
if (!forcePublish) {
try (SolrCore core = cc.getCore(cd.getName())) {
if (core == null || core.isClosed()) {
return;
}
MDCLoggingContext.setCore(core);
}
} else {
MDCLoggingContext.setCoreDescriptor(cc, cd);
}
try {
String collection = cd.getCloudDescriptor().getCollectionName();
log.debug("publishing state={}", state.toString());
// System.out.println(Thread.currentThread().getStackTrace()[3]);
Integer numShards = cd.getCloudDescriptor().getNumShards();
if (numShards == null) {
// XXX sys prop hack
log.debug("numShards not found on descriptor - reading it from system property");
numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
}
assert collection != null && collection.length() > 0;
String shardId = cd.getCloudDescriptor().getShardId();
String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
// recovery as requested before becoming active; don't even look at lirState if going down
if (state != Replica.State.DOWN) {
final Replica.State lirState = getLeaderInitiatedRecoveryState(collection, shardId, coreNodeName);
if (lirState != null) {
assert cd.getCloudDescriptor().getReplicaType() != Replica.Type.PULL : "LIR should not happen for pull replicas!";
if (state == Replica.State.ACTIVE) {
// trying to become active, so leader-initiated state must be recovering
if (lirState == Replica.State.RECOVERING) {
updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, Replica.State.ACTIVE, cd, true);
} else if (lirState == Replica.State.DOWN) {
throw new SolrException(ErrorCode.INVALID_STATE, "Cannot publish state of core '" + cd.getName() + "' as active without recovering first!");
}
} else if (state == Replica.State.RECOVERING) {
// if it is currently DOWN, then trying to enter into recovering state is good
if (lirState == Replica.State.DOWN) {
updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, Replica.State.RECOVERING, cd, true);
}
}
}
}
Map<String, Object> props = new HashMap<>();
props.put(Overseer.QUEUE_OPERATION, "state");
props.put(ZkStateReader.STATE_PROP, state.toString());
props.put(ZkStateReader.BASE_URL_PROP, getBaseUrl());
props.put(ZkStateReader.CORE_NAME_PROP, cd.getName());
props.put(ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles());
props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
props.put(ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId());
props.put(ZkStateReader.COLLECTION_PROP, collection);
props.put(ZkStateReader.REPLICA_TYPE, cd.getCloudDescriptor().getReplicaType().toString());
if (numShards != null) {
props.put(ZkStateReader.NUM_SHARDS_PROP, numShards.toString());
}
if (coreNodeName != null) {
props.put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
}
try (SolrCore core = cc.getCore(cd.getName())) {
if (core != null && core.getDirectoryFactory().isSharedStorage()) {
if (core != null && core.getDirectoryFactory().isSharedStorage()) {
props.put("dataDir", core.getDataDir());
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
if (ulog != null) {
props.put("ulogDir", ulog.getLogDir());
}
}
}
} catch (SolrCoreInitializationException ex) {
// The core had failed to initialize (in a previous request, not this one), hence nothing to do here.
log.info("The core '{}' had failed to initialize before.", cd.getName());
}
ZkNodeProps m = new ZkNodeProps(props);
if (updateLastState) {
cd.getCloudDescriptor().lastPublished = state;
}
overseerJobQueue.offer(Utils.toJSON(m));
} finally {
MDCLoggingContext.clear();
}
}
use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class ZkController method register.
/**
* Register shard with ZooKeeper.
*
* @return the shardId for the SolrCore
*/
public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores, boolean afterExpiration, boolean skipRecovery) throws Exception {
try (SolrCore core = cc.getCore(desc.getName())) {
MDCLoggingContext.setCore(core);
}
try {
// pre register has published our down state
final String baseUrl = getBaseUrl();
final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
final String collection = cloudDesc.getCollectionName();
final String coreZkNodeName = desc.getCloudDescriptor().getCoreNodeName();
assert coreZkNodeName != null : "we should have a coreNodeName by now";
String shardId = cloudDesc.getShardId();
Map<String, Object> props = new HashMap<>();
// we only put a subset of props into the leader node
props.put(ZkStateReader.BASE_URL_PROP, baseUrl);
props.put(ZkStateReader.CORE_NAME_PROP, coreName);
props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
log.debug("Register replica - core:{} address:{} collection:{} shard:{}", coreName, baseUrl, cloudDesc.getCollectionName(), shardId);
ZkNodeProps leaderProps = new ZkNodeProps(props);
try {
// If we're a preferred leader, insert ourselves at the head of the queue
boolean joinAtHead = false;
Replica replica = zkStateReader.getClusterState().getReplica(collection, coreZkNodeName);
if (replica != null) {
joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
}
//TODO WHy would replica be null?
if (replica == null || replica.getType() != Type.PULL) {
joinElection(desc, afterExpiration, joinAtHead);
} else if (replica.getType() == Type.PULL) {
if (joinAtHead) {
log.warn("Replica {} was designated as preferred leader but it's type is {}, It won't join election", coreZkNodeName, Type.PULL);
}
log.debug("Replica {} skipping election because it's type is {}", coreZkNodeName, Type.PULL);
startReplicationFromLeader(coreName, false);
}
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (KeeperException | IOException e) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
// in this case, we want to wait for the leader as long as the leader might
// wait for a vote, at least - but also long enough that a large cluster has
// time to get its act together
String leaderUrl = getLeader(cloudDesc, leaderVoteWait + 600000);
String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
log.debug("We are " + ourUrl + " and leader is " + leaderUrl);
boolean isLeader = leaderUrl.equals(ourUrl);
Replica.Type replicaType = zkStateReader.getClusterState().getCollection(collection).getReplica(coreZkNodeName).getType();
assert !(isLeader && replicaType == Type.PULL) : "Pull replica became leader!";
try (SolrCore core = cc.getCore(desc.getName())) {
// recover from local transaction log and wait for it to complete before
// going active
// TODO: should this be moved to another thread? To recoveryStrat?
// TODO: should this actually be done earlier, before (or as part of)
// leader election perhaps?
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
boolean isTlogReplicaAndNotLeader = replicaType == Replica.Type.TLOG && !isLeader;
if (isTlogReplicaAndNotLeader) {
String commitVersion = ReplicateFromLeader.getCommitVersion(core);
if (commitVersion != null) {
ulog.copyOverOldUpdates(Long.parseLong(commitVersion));
}
}
// we will call register again after zk expiration and on reload
if (!afterExpiration && !core.isReloaded() && ulog != null && !isTlogReplicaAndNotLeader) {
// disable recovery in case shard is in construction state (for shard splits)
Slice slice = getClusterState().getSlice(collection, shardId);
if (slice.getState() != Slice.State.CONSTRUCTION || !isLeader) {
Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler().getUpdateLog().recoverFromLog();
if (recoveryFuture != null) {
log.info("Replaying tlog for " + ourUrl + " during startup... NOTE: This can take a while.");
// NOTE: this could potentially block for
recoveryFuture.get();
// minutes or more!
// TODO: public as recovering in the mean time?
// TODO: in the future we could do peersync in parallel with recoverFromLog
} else {
log.debug("No LogReplay needed for core={} baseURL={}", core.getName(), baseUrl);
}
}
}
boolean didRecovery = checkRecovery(recoverReloadedCores, isLeader, skipRecovery, collection, coreZkNodeName, core, cc, afterExpiration);
if (!didRecovery) {
if (isTlogReplicaAndNotLeader) {
startReplicationFromLeader(coreName, true);
}
publish(desc, Replica.State.ACTIVE);
}
core.getCoreDescriptor().getCloudDescriptor().setHasRegistered(true);
}
// make sure we have an update cluster state right away
zkStateReader.forceUpdateCollection(collection);
return shardId;
} finally {
MDCLoggingContext.clear();
}
}
Aggregations