use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class RealTimeGetComponent method resolveFullDocument.
/***
* Given a partial document obtained from the transaction log (e.g. as a result of RTG), resolve to a full document
* by populating all the partial updates that were applied on top of that last full document update.
*
* @param onlyTheseFields When a non-null set of field names is passed in, the resolve process only attempts to populate
* the given fields in this set. When this set is null, it resolves all fields.
* @return Returns the merged document, i.e. the resolved full document, or null if the document was not found (deleted
* after the resolving began)
*/
private static SolrDocument resolveFullDocument(SolrCore core, BytesRef idBytes, ReturnFields returnFields, SolrInputDocument partialDoc, List logEntry, Set<String> onlyTheseFields) throws IOException {
if (idBytes == null || logEntry.size() != 5) {
throw new SolrException(ErrorCode.INVALID_STATE, "Either Id field not present in partial document or log entry doesn't have previous version.");
}
long prevPointer = (long) logEntry.get(UpdateLog.PREV_POINTER_IDX);
long prevVersion = (long) logEntry.get(UpdateLog.PREV_VERSION_IDX);
// get the last full document from ulog
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
long lastPrevPointer = ulog.applyPartialUpdates(idBytes, prevPointer, prevVersion, onlyTheseFields, partialDoc);
if (lastPrevPointer == -1) {
// full document was not found in tlog, but exists in index
SolrDocument mergedDoc = mergePartialDocWithFullDocFromIndex(core, idBytes, returnFields, onlyTheseFields, partialDoc);
return mergedDoc;
} else if (lastPrevPointer > 0) {
// We were supposed to have found the last full doc also in the tlogs, but the prevPointer links led to nowhere
// We should reopen a new RT searcher and get the doc. This should be a rare occurrence
Term idTerm = new Term(core.getLatestSchema().getUniqueKeyField().getName(), idBytes);
SolrDocument mergedDoc = reopenRealtimeSearcherAndGet(core, idTerm, returnFields);
if (mergedDoc == null) {
// the document may have been deleted as the resolving was going on.
return null;
}
return mergedDoc;
} else {
// i.e. lastPrevPointer==0
assert lastPrevPointer == 0;
// We have successfully resolved the document based off the tlogs
return toSolrDoc(partialDoc, core.getLatestSchema());
}
}
use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class RealTimeGetComponent method getInputDocumentFromTlog.
/** returns the SolrInputDocument from the current tlog, or DELETED if it has been deleted, or
* null if there is no record of it in the current update log. If null is returned, it could
* still be in the latest index.
* @param versionReturned If a non-null AtomicLong is passed in, it is set to the version of the update returned from the TLog.
* @param resolveFullDocument In case the document is fetched from the tlog, it could only be a partial document if the last update
* was an in-place update. In that case, should this partial document be resolved to a full document (by following
* back prevPointer/prevVersion)?
*/
public static SolrInputDocument getInputDocumentFromTlog(SolrCore core, BytesRef idBytes, AtomicLong versionReturned, Set<String> onlyTheseNonStoredDVs, boolean resolveFullDocument) {
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
if (ulog != null) {
Object o = ulog.lookup(idBytes);
if (o != null) {
// should currently be a List<Oper,Ver,Doc/Id>
List entry = (List) o;
assert entry.size() >= 3;
int oper = (Integer) entry.get(0) & UpdateLog.OPERATION_MASK;
if (versionReturned != null) {
versionReturned.set((long) entry.get(UpdateLog.VERSION_IDX));
}
switch(oper) {
case UpdateLog.UPDATE_INPLACE:
assert entry.size() == 5;
if (resolveFullDocument) {
SolrInputDocument doc = (SolrInputDocument) entry.get(entry.size() - 1);
try {
// For in-place update case, we have obtained the partial document till now. We need to
// resolve it to a full document to be returned to the user.
SolrDocument sdoc = resolveFullDocument(core, idBytes, new SolrReturnFields(), doc, entry, onlyTheseNonStoredDVs);
if (sdoc == null) {
return DELETED;
}
doc = toSolrInputDocument(sdoc, core.getLatestSchema());
return doc;
} catch (IOException ex) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Error while resolving full document. ", ex);
}
} else {
// fall through to ADD, so as to get only the partial document
}
case UpdateLog.ADD:
return (SolrInputDocument) entry.get(entry.size() - 1);
case UpdateLog.DELETE:
return DELETED;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper);
}
}
}
return null;
}
use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class TestRecovery method testTruncatedLog.
//
// test that a partially written last tlog entry (that will cause problems for both reverse reading and for
// log replay) doesn't stop us from coming up, and from recovering the documents that were not cut off.
//
@Test
public void testTruncatedLog() throws Exception {
try {
DirectUpdateHandler2.commitOnClose = false;
final Semaphore logReplay = new Semaphore(0);
final Semaphore logReplayFinish = new Semaphore(0);
UpdateLog.testing_logReplayHook = () -> {
try {
assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
} catch (Exception e) {
throw new RuntimeException(e);
}
};
UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();
UpdateLog ulog = h.getCore().getUpdateHandler().getUpdateLog();
File logDir = new File(h.getCore().getUpdateHandler().getUpdateLog().getLogDir());
clearIndex();
assertU(commit());
assertU(adoc("id", "F1"));
assertU(adoc("id", "F2"));
assertU(adoc("id", "F3"));
h.close();
String[] files = ulog.getLogList(logDir);
Arrays.sort(files);
RandomAccessFile raf = new RandomAccessFile(new File(logDir, files[files.length - 1]), "rw");
// seek to end
raf.seek(raf.length());
raf.writeLong(0xffffffffffffffffL);
raf.writeChars("This should be appended to a good log file, representing a bad partially written record.");
raf.close();
logReplay.release(1000);
logReplayFinish.drainPermits();
// this is what the corrupted log currently produces... subject to change.
ignoreException("OutOfBoundsException");
createCore();
assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
resetExceptionIgnores();
assertJQ(req("q", "*:*"), "/response/numFound==3");
//
// Now test that the bad log file doesn't mess up retrieving latest versions
//
String v104 = getNextVersion();
String v105 = getNextVersion();
String v106 = getNextVersion();
updateJ(jsonAdd(sdoc("id", "F4", "_version_", v104)), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
updateJ(jsonAdd(sdoc("id", "F5", "_version_", v105)), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
updateJ(jsonAdd(sdoc("id", "F6", "_version_", v106)), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
// This currently skips the bad log file and also returns the version of the clearIndex (del *:*)
// assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]");
assertJQ(req("qt", "/get", "getVersions", "3"), "/versions==[" + v106 + "," + v105 + "," + v104 + "]");
} finally {
DirectUpdateHandler2.commitOnClose = true;
UpdateLog.testing_logReplayHook = null;
UpdateLog.testing_logReplayFinishHook = null;
}
}
use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class TestRecovery method testRemoveOldLogs.
@Test
public void testRemoveOldLogs() throws Exception {
try {
DirectUpdateHandler2.commitOnClose = false;
final Semaphore logReplay = new Semaphore(0);
final Semaphore logReplayFinish = new Semaphore(0);
UpdateLog.testing_logReplayHook = () -> {
try {
assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
} catch (Exception e) {
throw new RuntimeException(e);
}
};
UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();
clearIndex();
assertU(commit());
UpdateLog ulog = h.getCore().getUpdateHandler().getUpdateLog();
File logDir = new File(h.getCore().getUpdateHandler().getUpdateLog().getLogDir());
h.close();
String[] files = ulog.getLogList(logDir);
for (String file : files) {
Files.delete(new File(logDir, file).toPath());
}
assertEquals(0, ulog.getLogList(logDir).length);
createCore();
int numIndexed = 0;
int maxReq = 200;
LinkedList<Long> versions = new LinkedList<>();
int docsPerBatch = 3;
// we don't expect to reach numRecordsToKeep as yet, so the bottleneck is still number of logs to keep
int expectedToRetain = ulog.getMaxNumLogsToKeep() * docsPerBatch;
int versExpected;
for (int i = 1; i <= ulog.getMaxNumLogsToKeep() + 2; i++) {
addDocs(docsPerBatch, numIndexed, versions);
numIndexed += docsPerBatch;
// not yet committed, so one more tlog could slip in
versExpected = Math.min(numIndexed, expectedToRetain + docsPerBatch);
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, versExpected)));
assertU(commit());
versExpected = Math.min(numIndexed, expectedToRetain);
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, versExpected)));
assertEquals(Math.min(i, ulog.getMaxNumLogsToKeep()), ulog.getLogList(logDir).length);
}
docsPerBatch = ulog.getNumRecordsToKeep() + 20;
// about to commit a lot of docs, so numRecordsToKeep becomes the bottleneck
expectedToRetain = ulog.getNumRecordsToKeep();
addDocs(docsPerBatch, numIndexed, versions);
numIndexed += docsPerBatch;
versExpected = Math.min(numIndexed, expectedToRetain);
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, versExpected)));
assertU(commit());
// we lose a log entry due to the commit record
expectedToRetain = expectedToRetain - 1;
versExpected = Math.min(numIndexed, expectedToRetain);
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, versExpected)));
// previous logs should be gone now
assertEquals(1, ulog.getLogList(logDir).length);
addDocs(1, numIndexed, versions);
numIndexed += 1;
h.close();
// trigger recovery, make sure that tlog reference handling is correct
createCore();
// test we can get versions while replay is happening
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
logReplay.release(1000);
assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
// we lose a log entry due to the commit record made by recovery
expectedToRetain = expectedToRetain - 1;
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
docsPerBatch = ulog.getNumRecordsToKeep() + 20;
// about to commit a lot of docs, so numRecordsToKeep becomes the bottleneck
expectedToRetain = ulog.getNumRecordsToKeep();
addDocs(docsPerBatch, numIndexed, versions);
numIndexed += docsPerBatch;
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
assertU(commit());
// we lose a log entry due to the commit record
expectedToRetain = expectedToRetain - 1;
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
// previous logs should be gone now
assertEquals(1, ulog.getLogList(logDir).length);
//
// test that a corrupt tlog file doesn't stop us from coming up, or seeing versions before that tlog file.
//
// don't add this to the versions list because we are going to lose it...
addDocs(1, numIndexed, new LinkedList<Long>());
h.close();
files = ulog.getLogList(logDir);
Arrays.sort(files);
RandomAccessFile raf = new RandomAccessFile(new File(logDir, files[files.length - 1]), "rw");
raf.writeChars("This is a trashed log file that really shouldn't work at all, but we'll see...");
raf.close();
ignoreException("Failure to open existing");
createCore();
// we should still be able to get the list of versions (not including the trashed log file)
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
resetExceptionIgnores();
} finally {
DirectUpdateHandler2.commitOnClose = true;
UpdateLog.testing_logReplayHook = null;
UpdateLog.testing_logReplayFinishHook = null;
}
}
use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.
the class TestRecoveryHdfs method testBufferingFlags.
@Test
public void testBufferingFlags() throws Exception {
DirectUpdateHandler2.commitOnClose = false;
final Semaphore logReplayFinish = new Semaphore(0);
UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();
SolrQueryRequest req = req();
UpdateHandler uhandler = req.getCore().getUpdateHandler();
UpdateLog ulog = uhandler.getUpdateLog();
try {
clearIndex();
assertU(commit());
assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
ulog.bufferUpdates();
// simulate updates from a leader
updateJ(jsonAdd(sdoc("id", "Q1", "_version_", "101")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
updateJ(jsonAdd(sdoc("id", "Q2", "_version_", "102")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
updateJ(jsonAdd(sdoc("id", "Q3", "_version_", "103")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
req.close();
h.close();
createCore();
req = req();
uhandler = req.getCore().getUpdateHandler();
ulog = uhandler.getUpdateLog();
// wait for replay to finish
logReplayFinish.acquire();
// since we died while buffering, we should see this last
assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0);
//
// Try again to ensure that the previous log replay didn't wipe out our flags
//
req.close();
h.close();
createCore();
req = req();
uhandler = req.getCore().getUpdateHandler();
ulog = uhandler.getUpdateLog();
assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0);
// now do some normal non-buffered adds
updateJ(jsonAdd(sdoc("id", "Q4", "_version_", "114")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
updateJ(jsonAdd(sdoc("id", "Q5", "_version_", "115")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
updateJ(jsonAdd(sdoc("id", "Q6", "_version_", "116")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
assertU(commit());
req.close();
h.close();
createCore();
req = req();
uhandler = req.getCore().getUpdateHandler();
ulog = uhandler.getUpdateLog();
assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0);
ulog.bufferUpdates();
// simulate receiving no updates
ulog.applyBufferedUpdates();
// do another add to make sure flags are back to normal
updateJ(jsonAdd(sdoc("id", "Q7", "_version_", "117")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
req.close();
h.close();
createCore();
req = req();
uhandler = req.getCore().getUpdateHandler();
ulog = uhandler.getUpdateLog();
// check flags on Q7
assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0);
logReplayFinish.acquire();
// leave each test method in a good state
assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
} finally {
DirectUpdateHandler2.commitOnClose = true;
UpdateLog.testing_logReplayHook = null;
UpdateLog.testing_logReplayFinishHook = null;
req().close();
}
}
Aggregations