Search in sources :

Example 21 with UpdateLog

use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.

the class RealTimeGetComponent method resolveFullDocument.

/***
   * Given a partial document obtained from the transaction log (e.g. as a result of RTG), resolve to a full document
   * by populating all the partial updates that were applied on top of that last full document update.
   * 
   * @param onlyTheseFields When a non-null set of field names is passed in, the resolve process only attempts to populate
   *        the given fields in this set. When this set is null, it resolves all fields.
   * @return Returns the merged document, i.e. the resolved full document, or null if the document was not found (deleted
   *          after the resolving began)
   */
private static SolrDocument resolveFullDocument(SolrCore core, BytesRef idBytes, ReturnFields returnFields, SolrInputDocument partialDoc, List logEntry, Set<String> onlyTheseFields) throws IOException {
    if (idBytes == null || logEntry.size() != 5) {
        throw new SolrException(ErrorCode.INVALID_STATE, "Either Id field not present in partial document or log entry doesn't have previous version.");
    }
    long prevPointer = (long) logEntry.get(UpdateLog.PREV_POINTER_IDX);
    long prevVersion = (long) logEntry.get(UpdateLog.PREV_VERSION_IDX);
    // get the last full document from ulog
    UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
    long lastPrevPointer = ulog.applyPartialUpdates(idBytes, prevPointer, prevVersion, onlyTheseFields, partialDoc);
    if (lastPrevPointer == -1) {
        // full document was not found in tlog, but exists in index
        SolrDocument mergedDoc = mergePartialDocWithFullDocFromIndex(core, idBytes, returnFields, onlyTheseFields, partialDoc);
        return mergedDoc;
    } else if (lastPrevPointer > 0) {
        // We were supposed to have found the last full doc also in the tlogs, but the prevPointer links led to nowhere
        // We should reopen a new RT searcher and get the doc. This should be a rare occurrence
        Term idTerm = new Term(core.getLatestSchema().getUniqueKeyField().getName(), idBytes);
        SolrDocument mergedDoc = reopenRealtimeSearcherAndGet(core, idTerm, returnFields);
        if (mergedDoc == null) {
            // the document may have been deleted as the resolving was going on.
            return null;
        }
        return mergedDoc;
    } else {
        // i.e. lastPrevPointer==0
        assert lastPrevPointer == 0;
        // We have successfully resolved the document based off the tlogs
        return toSolrDoc(partialDoc, core.getLatestSchema());
    }
}
Also used : SolrDocument(org.apache.solr.common.SolrDocument) UpdateLog(org.apache.solr.update.UpdateLog) Term(org.apache.lucene.index.Term) SolrException(org.apache.solr.common.SolrException)

Example 22 with UpdateLog

use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.

the class RealTimeGetComponent method getInputDocumentFromTlog.

/** returns the SolrInputDocument from the current tlog, or DELETED if it has been deleted, or
   * null if there is no record of it in the current update log.  If null is returned, it could
   * still be in the latest index.
   * @param versionReturned If a non-null AtomicLong is passed in, it is set to the version of the update returned from the TLog.
   * @param resolveFullDocument In case the document is fetched from the tlog, it could only be a partial document if the last update
   *                  was an in-place update. In that case, should this partial document be resolved to a full document (by following
   *                  back prevPointer/prevVersion)?
   */
public static SolrInputDocument getInputDocumentFromTlog(SolrCore core, BytesRef idBytes, AtomicLong versionReturned, Set<String> onlyTheseNonStoredDVs, boolean resolveFullDocument) {
    UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
    if (ulog != null) {
        Object o = ulog.lookup(idBytes);
        if (o != null) {
            // should currently be a List<Oper,Ver,Doc/Id>
            List entry = (List) o;
            assert entry.size() >= 3;
            int oper = (Integer) entry.get(0) & UpdateLog.OPERATION_MASK;
            if (versionReturned != null) {
                versionReturned.set((long) entry.get(UpdateLog.VERSION_IDX));
            }
            switch(oper) {
                case UpdateLog.UPDATE_INPLACE:
                    assert entry.size() == 5;
                    if (resolveFullDocument) {
                        SolrInputDocument doc = (SolrInputDocument) entry.get(entry.size() - 1);
                        try {
                            // For in-place update case, we have obtained the partial document till now. We need to
                            // resolve it to a full document to be returned to the user.
                            SolrDocument sdoc = resolveFullDocument(core, idBytes, new SolrReturnFields(), doc, entry, onlyTheseNonStoredDVs);
                            if (sdoc == null) {
                                return DELETED;
                            }
                            doc = toSolrInputDocument(sdoc, core.getLatestSchema());
                            return doc;
                        } catch (IOException ex) {
                            throw new SolrException(ErrorCode.SERVER_ERROR, "Error while resolving full document. ", ex);
                        }
                    } else {
                    // fall through to ADD, so as to get only the partial document
                    }
                case UpdateLog.ADD:
                    return (SolrInputDocument) entry.get(entry.size() - 1);
                case UpdateLog.DELETE:
                    return DELETED;
                default:
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper);
            }
        }
    }
    return null;
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrDocument(org.apache.solr.common.SolrDocument) UpdateLog(org.apache.solr.update.UpdateLog) SolrDocumentList(org.apache.solr.common.SolrDocumentList) DocList(org.apache.solr.search.DocList) List(java.util.List) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) IndexFingerprint(org.apache.solr.update.IndexFingerprint) SolrReturnFields(org.apache.solr.search.SolrReturnFields) SolrException(org.apache.solr.common.SolrException)

Example 23 with UpdateLog

use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.

the class TestRecovery method testTruncatedLog.

//
// test that a partially written last tlog entry (that will cause problems for both reverse reading and for
// log replay) doesn't stop us from coming up, and from recovering the documents that were not cut off.
//
@Test
public void testTruncatedLog() throws Exception {
    try {
        DirectUpdateHandler2.commitOnClose = false;
        final Semaphore logReplay = new Semaphore(0);
        final Semaphore logReplayFinish = new Semaphore(0);
        UpdateLog.testing_logReplayHook = () -> {
            try {
                assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        };
        UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();
        UpdateLog ulog = h.getCore().getUpdateHandler().getUpdateLog();
        File logDir = new File(h.getCore().getUpdateHandler().getUpdateLog().getLogDir());
        clearIndex();
        assertU(commit());
        assertU(adoc("id", "F1"));
        assertU(adoc("id", "F2"));
        assertU(adoc("id", "F3"));
        h.close();
        String[] files = ulog.getLogList(logDir);
        Arrays.sort(files);
        RandomAccessFile raf = new RandomAccessFile(new File(logDir, files[files.length - 1]), "rw");
        // seek to end
        raf.seek(raf.length());
        raf.writeLong(0xffffffffffffffffL);
        raf.writeChars("This should be appended to a good log file, representing a bad partially written record.");
        raf.close();
        logReplay.release(1000);
        logReplayFinish.drainPermits();
        // this is what the corrupted log currently produces... subject to change.
        ignoreException("OutOfBoundsException");
        createCore();
        assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
        resetExceptionIgnores();
        assertJQ(req("q", "*:*"), "/response/numFound==3");
        //
        // Now test that the bad log file doesn't mess up retrieving latest versions
        //
        String v104 = getNextVersion();
        String v105 = getNextVersion();
        String v106 = getNextVersion();
        updateJ(jsonAdd(sdoc("id", "F4", "_version_", v104)), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        updateJ(jsonAdd(sdoc("id", "F5", "_version_", v105)), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        updateJ(jsonAdd(sdoc("id", "F6", "_version_", v106)), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        // This currently skips the bad log file and also returns the version of the clearIndex (del *:*)
        // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]");
        assertJQ(req("qt", "/get", "getVersions", "3"), "/versions==[" + v106 + "," + v105 + "," + v104 + "]");
    } finally {
        DirectUpdateHandler2.commitOnClose = true;
        UpdateLog.testing_logReplayHook = null;
        UpdateLog.testing_logReplayFinishHook = null;
    }
}
Also used : RandomAccessFile(java.io.RandomAccessFile) UpdateLog(org.apache.solr.update.UpdateLog) Semaphore(java.util.concurrent.Semaphore) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Example 24 with UpdateLog

use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.

the class TestRecovery method testRemoveOldLogs.

@Test
public void testRemoveOldLogs() throws Exception {
    try {
        DirectUpdateHandler2.commitOnClose = false;
        final Semaphore logReplay = new Semaphore(0);
        final Semaphore logReplayFinish = new Semaphore(0);
        UpdateLog.testing_logReplayHook = () -> {
            try {
                assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        };
        UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();
        clearIndex();
        assertU(commit());
        UpdateLog ulog = h.getCore().getUpdateHandler().getUpdateLog();
        File logDir = new File(h.getCore().getUpdateHandler().getUpdateLog().getLogDir());
        h.close();
        String[] files = ulog.getLogList(logDir);
        for (String file : files) {
            Files.delete(new File(logDir, file).toPath());
        }
        assertEquals(0, ulog.getLogList(logDir).length);
        createCore();
        int numIndexed = 0;
        int maxReq = 200;
        LinkedList<Long> versions = new LinkedList<>();
        int docsPerBatch = 3;
        // we don't expect to reach numRecordsToKeep as yet, so the bottleneck is still number of logs to keep
        int expectedToRetain = ulog.getMaxNumLogsToKeep() * docsPerBatch;
        int versExpected;
        for (int i = 1; i <= ulog.getMaxNumLogsToKeep() + 2; i++) {
            addDocs(docsPerBatch, numIndexed, versions);
            numIndexed += docsPerBatch;
            // not yet committed, so one more tlog could slip in
            versExpected = Math.min(numIndexed, expectedToRetain + docsPerBatch);
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, versExpected)));
            assertU(commit());
            versExpected = Math.min(numIndexed, expectedToRetain);
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, versExpected)));
            assertEquals(Math.min(i, ulog.getMaxNumLogsToKeep()), ulog.getLogList(logDir).length);
        }
        docsPerBatch = ulog.getNumRecordsToKeep() + 20;
        // about to commit a lot of docs, so numRecordsToKeep becomes the bottleneck
        expectedToRetain = ulog.getNumRecordsToKeep();
        addDocs(docsPerBatch, numIndexed, versions);
        numIndexed += docsPerBatch;
        versExpected = Math.min(numIndexed, expectedToRetain);
        assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, versExpected)));
        assertU(commit());
        // we lose a log entry due to the commit record
        expectedToRetain = expectedToRetain - 1;
        versExpected = Math.min(numIndexed, expectedToRetain);
        assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, versExpected)));
        // previous logs should be gone now
        assertEquals(1, ulog.getLogList(logDir).length);
        addDocs(1, numIndexed, versions);
        numIndexed += 1;
        h.close();
        // trigger recovery, make sure that tlog reference handling is correct
        createCore();
        // test we can get versions while replay is happening
        assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
        logReplay.release(1000);
        assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
        // we lose a log entry due to the commit record made by recovery
        expectedToRetain = expectedToRetain - 1;
        assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
        docsPerBatch = ulog.getNumRecordsToKeep() + 20;
        // about to commit a lot of docs, so numRecordsToKeep becomes the bottleneck
        expectedToRetain = ulog.getNumRecordsToKeep();
        addDocs(docsPerBatch, numIndexed, versions);
        numIndexed += docsPerBatch;
        assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
        assertU(commit());
        // we lose a log entry due to the commit record
        expectedToRetain = expectedToRetain - 1;
        assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
        // previous logs should be gone now
        assertEquals(1, ulog.getLogList(logDir).length);
        //
        // test that a corrupt tlog file doesn't stop us from coming up, or seeing versions before that tlog file.
        //
        // don't add this to the versions list because we are going to lose it...
        addDocs(1, numIndexed, new LinkedList<Long>());
        h.close();
        files = ulog.getLogList(logDir);
        Arrays.sort(files);
        RandomAccessFile raf = new RandomAccessFile(new File(logDir, files[files.length - 1]), "rw");
        raf.writeChars("This is a trashed log file that really shouldn't work at all, but we'll see...");
        raf.close();
        ignoreException("Failure to open existing");
        createCore();
        // we should still be able to get the list of versions (not including the trashed log file)
        assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, expectedToRetain)));
        resetExceptionIgnores();
    } finally {
        DirectUpdateHandler2.commitOnClose = true;
        UpdateLog.testing_logReplayHook = null;
        UpdateLog.testing_logReplayFinishHook = null;
    }
}
Also used : RandomAccessFile(java.io.RandomAccessFile) UpdateLog(org.apache.solr.update.UpdateLog) Semaphore(java.util.concurrent.Semaphore) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 25 with UpdateLog

use of org.apache.solr.update.UpdateLog in project lucene-solr by apache.

the class TestRecoveryHdfs method testBufferingFlags.

@Test
public void testBufferingFlags() throws Exception {
    DirectUpdateHandler2.commitOnClose = false;
    final Semaphore logReplayFinish = new Semaphore(0);
    UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();
    SolrQueryRequest req = req();
    UpdateHandler uhandler = req.getCore().getUpdateHandler();
    UpdateLog ulog = uhandler.getUpdateLog();
    try {
        clearIndex();
        assertU(commit());
        assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
        ulog.bufferUpdates();
        // simulate updates from a leader
        updateJ(jsonAdd(sdoc("id", "Q1", "_version_", "101")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        updateJ(jsonAdd(sdoc("id", "Q2", "_version_", "102")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        updateJ(jsonAdd(sdoc("id", "Q3", "_version_", "103")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
        req.close();
        h.close();
        createCore();
        req = req();
        uhandler = req.getCore().getUpdateHandler();
        ulog = uhandler.getUpdateLog();
        // wait for replay to finish
        logReplayFinish.acquire();
        // since we died while buffering, we should see this last
        assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0);
        //
        // Try again to ensure that the previous log replay didn't wipe out our flags
        //
        req.close();
        h.close();
        createCore();
        req = req();
        uhandler = req.getCore().getUpdateHandler();
        ulog = uhandler.getUpdateLog();
        assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0);
        // now do some normal non-buffered adds
        updateJ(jsonAdd(sdoc("id", "Q4", "_version_", "114")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        updateJ(jsonAdd(sdoc("id", "Q5", "_version_", "115")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        updateJ(jsonAdd(sdoc("id", "Q6", "_version_", "116")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        assertU(commit());
        req.close();
        h.close();
        createCore();
        req = req();
        uhandler = req.getCore().getUpdateHandler();
        ulog = uhandler.getUpdateLog();
        assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0);
        ulog.bufferUpdates();
        // simulate receiving no updates
        ulog.applyBufferedUpdates();
        // do another add to make sure flags are back to normal
        updateJ(jsonAdd(sdoc("id", "Q7", "_version_", "117")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
        req.close();
        h.close();
        createCore();
        req = req();
        uhandler = req.getCore().getUpdateHandler();
        ulog = uhandler.getUpdateLog();
        // check flags on Q7
        assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0);
        logReplayFinish.acquire();
        // leave each test method in a good state
        assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
    } finally {
        DirectUpdateHandler2.commitOnClose = true;
        UpdateLog.testing_logReplayHook = null;
        UpdateLog.testing_logReplayFinishHook = null;
        req().close();
    }
}
Also used : UpdateHandler(org.apache.solr.update.UpdateHandler) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) HdfsUpdateLog(org.apache.solr.update.HdfsUpdateLog) UpdateLog(org.apache.solr.update.UpdateLog) Semaphore(java.util.concurrent.Semaphore) Test(org.junit.Test)

Aggregations

UpdateLog (org.apache.solr.update.UpdateLog)34 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)16 Test (org.junit.Test)15 Semaphore (java.util.concurrent.Semaphore)14 UpdateHandler (org.apache.solr.update.UpdateHandler)12 IOException (java.io.IOException)11 SolrException (org.apache.solr.common.SolrException)11 SolrCore (org.apache.solr.core.SolrCore)8 File (java.io.File)6 ArrayList (java.util.ArrayList)6 IndexFingerprint (org.apache.solr.update.IndexFingerprint)6 RandomAccessFile (java.io.RandomAccessFile)5 Replica (org.apache.solr.common.cloud.Replica)5 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)5 SolrDocument (org.apache.solr.common.SolrDocument)4 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)4 SolrParams (org.apache.solr.common.params.SolrParams)4 NamedList (org.apache.solr.common.util.NamedList)4 URISyntaxException (java.net.URISyntaxException)3 HashMap (java.util.HashMap)3