use of org.apache.solr.update.VersionInfo in project lucene-solr by apache.
the class ReplicationHandler method getMaxVersion.
/**
* Retrieves the maximum version number from an index commit.
*/
private long getMaxVersion(IndexCommit commit) throws IOException {
try (DirectoryReader reader = DirectoryReader.open(commit)) {
IndexSearcher searcher = new IndexSearcher(reader);
VersionInfo vinfo = core.getUpdateHandler().getUpdateLog().getVersionInfo();
return Math.abs(vinfo.getMaxVersionFromIndex(searcher));
}
}
use of org.apache.solr.update.VersionInfo in project lucene-solr by apache.
the class IndexFetcher method moveTlogFiles.
/**
* <p>
* Copy all the tlog files from the temp tlog dir to the actual tlog dir, and reset
* the {@link UpdateLog}. The copy will try to preserve the original tlog directory
* if the copy fails.
* </p>
* <p>
* This assumes that the tlog files transferred from the leader are in synch with the
* index files transferred from the leader. The reset of the update log relies on the version
* of the latest operations found in the tlog files. If the tlogs are ahead of the latest commit
* point, it will not copy all the needed buffered updates for the replay and it will miss
* some operations.
* </p>
*/
private boolean moveTlogFiles(File tmpTlogDir) {
UpdateLog ulog = solrCore.getUpdateHandler().getUpdateLog();
VersionInfo vinfo = ulog.getVersionInfo();
// block updates until the new update log is initialised
vinfo.blockUpdates();
try {
// reset the update log before copying the new tlog directory
CdcrUpdateLog.BufferedUpdates bufferedUpdates = ((CdcrUpdateLog) ulog).resetForRecovery();
// try to move the temp tlog files to the tlog directory
if (!copyTmpTlogFiles2Tlog(tmpTlogDir))
return false;
// reinitialise the update log and copy the buffered updates
if (bufferedUpdates.tlog != null) {
// map file path to its new backup location
File parentDir = FileSystems.getDefault().getPath(solrCore.getUpdateHandler().getUpdateLog().getLogDir()).getParent().toFile();
File backupTlogDir = new File(parentDir, tmpTlogDir.getName());
bufferedUpdates.tlog = new File(backupTlogDir, bufferedUpdates.tlog.getName());
}
// init the update log with the new set of tlog files, and copy the buffered updates
((CdcrUpdateLog) ulog).initForRecovery(bufferedUpdates.tlog, bufferedUpdates.offset);
} catch (Exception e) {
LOG.error("Unable to copy tlog files", e);
return false;
} finally {
vinfo.unblockUpdates();
}
return true;
}
use of org.apache.solr.update.VersionInfo in project lucene-solr by apache.
the class TestStressRecovery method testStressRecovery.
// This version simulates updates coming from the leader and sometimes being reordered
// and tests the ability to buffer updates and apply them later
@Test
public void testStressRecovery() throws Exception {
assumeFalse("FIXME: This test is horribly slow sometimes on Windows!", Constants.WINDOWS);
clearIndex();
assertU(commit());
final int commitPercent = 5 + random().nextInt(10);
// what percent of the commits are soft
final int softCommitPercent = 30 + random().nextInt(75);
final int deletePercent = 4 + random().nextInt(25);
final int deleteByQueryPercent = random().nextInt(5);
final int ndocs = 5 + (random().nextBoolean() ? random().nextInt(25) : random().nextInt(200));
// fewer write threads to give recovery thread more of a chance
int nWriteThreads = 2 + random().nextInt(10);
final int maxConcurrentCommits = nWriteThreads;
// query variables
final int percentRealtimeQuery = 75;
final int percentGetLatestVersions = random().nextInt(4);
// number of recovery loops to perform
final AtomicLong operations = new AtomicLong(atLeast(100));
// fewer read threads to give writers more of a chance
int nReadThreads = 2 + random().nextInt(10);
initModel(ndocs);
final AtomicInteger numCommitting = new AtomicInteger();
List<Thread> threads = new ArrayList<>();
final AtomicLong testVersion = new AtomicLong(0);
final UpdateHandler uHandler = h.getCore().getUpdateHandler();
final UpdateLog uLog = uHandler.getUpdateLog();
final VersionInfo vInfo = uLog.getVersionInfo();
final Object stateChangeLock = new Object();
this.visibleModel = model;
final Semaphore[] writePermissions = new Semaphore[nWriteThreads];
for (int i = 0; i < nWriteThreads; i++) writePermissions[i] = new Semaphore(Integer.MAX_VALUE, false);
final Semaphore readPermission = new Semaphore(Integer.MAX_VALUE, false);
for (int i = 0; i < nWriteThreads; i++) {
final int threadNum = i;
Thread thread = new Thread("WRITER" + i) {
Random rand = new Random(random().nextInt());
Semaphore writePermission = writePermissions[threadNum];
@Override
public void run() {
try {
while (operations.get() > 0) {
writePermission.acquire();
int oper = rand.nextInt(10);
if (oper < commitPercent) {
if (numCommitting.incrementAndGet() <= maxConcurrentCommits) {
Map<Integer, DocInfo> newCommittedModel;
long version;
synchronized (globalLock) {
// take a snapshot
newCommittedModel = new HashMap<>(model);
version = snapshotCount++;
}
synchronized (stateChangeLock) {
// so change the version to -1 so we won't update our model.
if (uLog.getState() != UpdateLog.State.ACTIVE)
version = -1;
if (rand.nextInt(100) < softCommitPercent) {
verbose("softCommit start");
assertU(TestHarness.commit("softCommit", "true"));
verbose("softCommit end");
} else {
verbose("hardCommit start");
assertU(commit());
verbose("hardCommit end");
}
}
synchronized (globalLock) {
// install this model only if we are not in recovery mode.
if (version >= committedModelClock) {
if (VERBOSE) {
verbose("installing new committedModel version=" + committedModelClock);
}
committedModel = newCommittedModel;
committedModelClock = version;
}
}
}
numCommitting.decrementAndGet();
continue;
}
int id;
if (rand.nextBoolean()) {
id = rand.nextInt(ndocs);
} else {
// reuse the last ID half of the time to force more race conditions
id = lastId;
}
// set the lastId before we actually change it sometimes to try and
// uncover more race conditions between writing and reading
boolean before = rand.nextBoolean();
if (before) {
lastId = id;
}
DocInfo info = model.get(id);
long val = info.val;
long nextVal = Math.abs(val) + 1;
// the version we set on the update should determine who wins
// These versions are not derived from the actual leader update handler hand hence this
// test may need to change depending on how we handle version numbers.
long version = testVersion.incrementAndGet();
// yield after getting the next version to increase the odds of updates happening out of order
if (rand.nextBoolean())
Thread.yield();
if (oper < commitPercent + deletePercent) {
verbose("deleting id", id, "val=", nextVal, "version", version);
Long returnedVersion = deleteAndGetVersion(Integer.toString(id), params("_version_", Long.toString(-version), DISTRIB_UPDATE_PARAM, FROM_LEADER));
// but if we do return, they had better be equal
if (returnedVersion != null) {
assertEquals(-version, returnedVersion.longValue());
}
// only update model if the version is newer
synchronized (model) {
DocInfo currInfo = model.get(id);
if (Math.abs(version) > Math.abs(currInfo.version)) {
model.put(id, new DocInfo(version, -nextVal));
}
}
verbose("deleting id", id, "val=", nextVal, "version", version, "DONE");
} else if (oper < commitPercent + deletePercent + deleteByQueryPercent) {
verbose("deleteByQuery id", id, "val=", nextVal, "version", version);
Long returnedVersion = deleteByQueryAndGetVersion("id:" + Integer.toString(id), params("_version_", Long.toString(-version), DISTRIB_UPDATE_PARAM, FROM_LEADER));
// but if we do return, they had better be equal
if (returnedVersion != null) {
assertEquals(-version, returnedVersion.longValue());
}
// only update model if the version is newer
synchronized (model) {
DocInfo currInfo = model.get(id);
if (Math.abs(version) > Math.abs(currInfo.version)) {
model.put(id, new DocInfo(version, -nextVal));
}
}
verbose("deleteByQuery id", id, "val=", nextVal, "version", version, "DONE");
} else {
verbose("adding id", id, "val=", nextVal, "version", version);
Long returnedVersion = addAndGetVersion(sdoc("id", Integer.toString(id), FIELD, Long.toString(nextVal), "_version_", Long.toString(version)), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
if (returnedVersion != null) {
assertEquals(version, returnedVersion.longValue());
}
// only update model if the version is newer
synchronized (model) {
DocInfo currInfo = model.get(id);
if (version > currInfo.version) {
model.put(id, new DocInfo(version, nextVal));
}
}
if (VERBOSE) {
verbose("adding id", id, "val=", nextVal, "version", version, "DONE");
}
}
if (!before) {
lastId = id;
}
}
} catch (Throwable e) {
operations.set(-1L);
throw new RuntimeException(e);
}
}
};
threads.add(thread);
}
for (int i = 0; i < nReadThreads; i++) {
Thread thread = new Thread("READER" + i) {
Random rand = new Random(random().nextInt());
@Override
public void run() {
try {
while (operations.get() > 0) {
// throttle reads (don't completely stop)
readPermission.tryAcquire(10, TimeUnit.MILLISECONDS);
// bias toward a recently changed doc
int id = rand.nextInt(100) < 25 ? lastId : rand.nextInt(ndocs);
// when indexing, we update the index, then the model
// so when querying, we should first check the model, and then the index
boolean realTime = rand.nextInt(100) < percentRealtimeQuery;
DocInfo info;
if (realTime) {
info = visibleModel.get(id);
} else {
synchronized (globalLock) {
info = committedModel.get(id);
}
}
if (VERBOSE) {
verbose("querying id", id);
}
SolrQueryRequest sreq;
if (realTime) {
sreq = req("wt", "json", "qt", "/get", "ids", Integer.toString(id));
} else {
sreq = req("wt", "json", "q", "id:" + Integer.toString(id), "omitHeader", "true");
}
String response = h.query(sreq);
Map rsp = (Map) ObjectBuilder.fromJSON(response);
List doclist = (List) (((Map) rsp.get("response")).get("docs"));
if (doclist.size() == 0) {
// there's no info we can get back with a delete, so not much we can check without further synchronization
} else {
assertEquals(1, doclist.size());
long foundVal = (Long) (((Map) doclist.get(0)).get(FIELD));
long foundVer = (Long) (((Map) doclist.get(0)).get("_version_"));
if (foundVer < Math.abs(info.version) || (foundVer == info.version && foundVal != info.val)) {
// if the version matches, the val must
verbose("ERROR, id=", id, "found=", response, "model", info);
assertTrue(false);
}
}
}
if (rand.nextInt(100) < percentGetLatestVersions) {
getLatestVersions();
// TODO: some sort of validation that the latest version is >= to the latest version we added?
}
} catch (Throwable e) {
operations.set(-1L);
throw new RuntimeException(e);
}
}
};
threads.add(thread);
}
for (Thread thread : threads) {
thread.start();
}
int bufferedAddsApplied = 0;
do {
assertTrue(uLog.getState() == UpdateLog.State.ACTIVE);
// before we start buffering updates, we want to point
// visibleModel away from the live model.
visibleModel = new ConcurrentHashMap<>(model);
synchronized (stateChangeLock) {
uLog.bufferUpdates();
}
assertTrue(uLog.getState() == UpdateLog.State.BUFFERING);
// sometimes wait for a second to allow time for writers to write something
if (random().nextBoolean())
Thread.sleep(random().nextInt(10) + 1);
Future<UpdateLog.RecoveryInfo> recoveryInfoF = uLog.applyBufferedUpdates();
if (recoveryInfoF != null) {
UpdateLog.RecoveryInfo recInfo = null;
int writeThreadNumber = 0;
while (recInfo == null) {
try {
// wait a short period of time for recovery to complete (and to give a chance for more writers to concurrently add docs)
recInfo = recoveryInfoF.get(random().nextInt(100 / nWriteThreads), TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
// idle one more write thread
verbose("Operation", operations.get(), "Draining permits for write thread", writeThreadNumber);
writePermissions[writeThreadNumber++].drainPermits();
if (writeThreadNumber >= nWriteThreads) {
// if we hit the end, back up and give a few write permits
writeThreadNumber--;
writePermissions[writeThreadNumber].release(random().nextInt(2) + 1);
}
// throttle readers so they don't steal too much CPU from the recovery thread
readPermission.drainPermits();
}
}
bufferedAddsApplied += recInfo.adds;
}
// put all writers back at full blast
for (Semaphore writePerm : writePermissions) {
// I don't think semaphores check for overflow, so we need to check mow many remain
int neededPermits = Integer.MAX_VALUE - writePerm.availablePermits();
if (neededPermits > 0)
writePerm.release(neededPermits);
}
// put back readers at full blast and point back to live model
visibleModel = model;
int neededPermits = Integer.MAX_VALUE - readPermission.availablePermits();
if (neededPermits > 0)
readPermission.release(neededPermits);
verbose("ROUND=", operations.get());
} while (operations.decrementAndGet() > 0);
verbose("bufferedAddsApplied=", bufferedAddsApplied);
for (Thread thread : threads) {
thread.join();
}
}
use of org.apache.solr.update.VersionInfo in project lucene-solr by apache.
the class CdcrRequestHandler method handleShardCheckpointAction.
/**
* Retrieve the version number of the latest entry of the {@link org.apache.solr.update.UpdateLog}.
*/
private void handleShardCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) {
if (!leaderStateManager.amILeader()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Action '" + CdcrParams.CdcrAction.SHARDCHECKPOINT + "' sent to non-leader replica");
}
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
VersionInfo versionInfo = ulog.getVersionInfo();
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
long maxVersionFromRecent = recentUpdates.getMaxRecentVersion();
long maxVersionFromIndex = versionInfo.getMaxVersionFromIndex(req.getSearcher());
log.info("Found maxVersionFromRecent {} maxVersionFromIndex {}", maxVersionFromRecent, maxVersionFromIndex);
// there is no race with ongoing bootstrap because we don't expect any updates to come from the source
long maxVersion = Math.max(maxVersionFromIndex, maxVersionFromRecent);
if (maxVersion == 0L) {
maxVersion = -1;
}
rsp.add(CdcrParams.CHECKPOINT, maxVersion);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Action '" + CdcrParams.CdcrAction.SHARDCHECKPOINT + "' could not read max version");
}
}
Aggregations