Search in sources :

Example 1 with Location

use of org.opensearch.index.translog.Translog.Location in project OpenSearch by opensearch-project.

the class TranslogTests method testLocationComparison.

public void testLocationComparison() throws IOException {
    List<Translog.Location> locations = new ArrayList<>();
    int translogOperations = randomIntBetween(10, 100);
    int count = 0;
    for (int op = 0; op < translogOperations; op++) {
        locations.add(translog.add(new Translog.Index("test", "" + op, op, primaryTerm.get(), Integer.toString(++count).getBytes(Charset.forName("UTF-8")))));
        if (rarely() && translogOperations > op + 1) {
            translog.rollGeneration();
        }
    }
    Collections.shuffle(locations, random());
    Translog.Location max = locations.get(0);
    for (Translog.Location location : locations) {
        max = max(max, location);
    }
    assertEquals(max.generation, translog.currentFileGeneration());
    try (Translog.Snapshot snap = new SortedSnapshot(translog.newSnapshot())) {
        Translog.Operation next;
        Translog.Operation maxOp = null;
        while ((next = snap.next()) != null) {
            maxOp = next;
        }
        assertNotNull(maxOp);
        assertEquals(maxOp.getSource().source.utf8ToString(), Integer.toString(count));
    }
}
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) Location(org.opensearch.index.translog.Translog.Location) Location(org.opensearch.index.translog.Translog.Location)

Example 2 with Location

use of org.opensearch.index.translog.Translog.Location in project OpenSearch by opensearch-project.

the class TranslogTests method testTragicEventCanBeAnyException.

public void testTragicEventCanBeAnyException() throws IOException {
    Path tempDir = createTempDir();
    final FailSwitch fail = new FailSwitch();
    TranslogConfig config = getTranslogConfig(tempDir);
    Translog translog = getFailableTranslog(fail, config, false, true, null, createTranslogDeletionPolicy());
    // writes pretty big docs so we cross buffer boarders regularly
    LineFileDocs lineFileDocs = new LineFileDocs(random());
    translog.add(new Translog.Index("test", "1", 0, primaryTerm.get(), lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))));
    fail.failAlways();
    try {
        Translog.Location location = translog.add(new Translog.Index("test", "2", 1, primaryTerm.get(), lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))));
        if (randomBoolean()) {
            translog.ensureSynced(location);
        } else {
            translog.sync();
        }
        // TODO once we have a mock FS that can simulate we can also fail on plain sync
        fail("WTF");
    } catch (UnknownException ex) {
    // w00t
    } catch (TranslogException ex) {
        assertTrue(ex.getCause() instanceof UnknownException);
    }
    assertFalse(translog.isOpen());
    assertTrue(translog.getTragicException() instanceof UnknownException);
}
Also used : Path(java.nio.file.Path) Location(org.opensearch.index.translog.Translog.Location) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 3 with Location

use of org.opensearch.index.translog.Translog.Location in project OpenSearch by opensearch-project.

the class TranslogTests method testSyncUpToStream.

public void testSyncUpToStream() throws IOException {
    int iters = randomIntBetween(5, 10);
    for (int i = 0; i < iters; i++) {
        int translogOperations = randomIntBetween(10, 100);
        int count = 0;
        ArrayList<Location> locations = new ArrayList<>();
        for (int op = 0; op < translogOperations; op++) {
            if (rarely()) {
                translog.rollGeneration();
            }
            final Translog.Location location = translog.add(new Translog.Index("test", "" + op, op, primaryTerm.get(), Integer.toString(++count).getBytes(Charset.forName("UTF-8"))));
            locations.add(location);
        }
        Collections.shuffle(locations, random());
        if (randomBoolean()) {
            assertTrue("at least one operation pending", translog.syncNeeded());
            assertTrue("this operation has not been synced", translog.ensureSynced(locations.stream()));
            // we are the last location so everything should be synced
            assertFalse("the last call to ensureSycned synced all previous ops", translog.syncNeeded());
        } else if (rarely()) {
            translog.rollGeneration();
            // not syncing now
            assertFalse("location is from a previous translog - already synced", translog.ensureSynced(locations.stream()));
            assertFalse("no sync needed since no operations in current translog", translog.syncNeeded());
        } else {
            translog.sync();
            assertFalse("translog has been synced already", translog.ensureSynced(locations.stream()));
        }
        for (Location location : locations) {
            assertFalse("all of the locations should be synced: " + location, translog.ensureSynced(location));
        }
    }
}
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) Location(org.opensearch.index.translog.Translog.Location) Location(org.opensearch.index.translog.Translog.Location)

Example 4 with Location

use of org.opensearch.index.translog.Translog.Location in project OpenSearch by opensearch-project.

the class TranslogTests method testSyncConcurrently.

public void testSyncConcurrently() throws Exception {
    Path path = createTempDir("translog");
    TranslogConfig config = getTranslogConfig(path);
    String translogUUID = Translog.createEmptyTranslog(config.getTranslogPath(), SequenceNumbers.NO_OPS_PERFORMED, shardId, primaryTerm.get());
    Set<Long> persistedSeqNos = ConcurrentCollections.newConcurrentSet();
    AtomicLong lastGlobalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED);
    LongSupplier globalCheckpointSupplier = () -> {
        if (randomBoolean()) {
            return lastGlobalCheckpoint.addAndGet(randomIntBetween(1, 100));
        } else {
            return lastGlobalCheckpoint.get();
        }
    };
    try (Translog translog = new Translog(config, translogUUID, createTranslogDeletionPolicy(config.getIndexSettings()), globalCheckpointSupplier, primaryTerm::get, persistedSeqNos::add)) {
        Thread[] threads = new Thread[between(2, 8)];
        Phaser phaser = new Phaser(threads.length);
        AtomicLong nextSeqNo = new AtomicLong();
        for (int t = 0; t < threads.length; t++) {
            threads[t] = new Thread(() -> {
                phaser.arriveAndAwaitAdvance();
                int iterations = randomIntBetween(10, 100);
                for (int i = 0; i < iterations; i++) {
                    List<Translog.Operation> ops = IntStream.range(0, between(1, 10)).mapToObj(n -> new Translog.Index("test", "1", nextSeqNo.incrementAndGet(), primaryTerm.get(), new byte[] { 1 })).collect(Collectors.toList());
                    try {
                        Translog.Location location = null;
                        for (Translog.Operation op : ops) {
                            location = translog.add(op);
                        }
                        assertNotNull(location);
                        long globalCheckpoint = lastGlobalCheckpoint.get();
                        final boolean synced;
                        if (randomBoolean()) {
                            synced = translog.ensureSynced(location);
                        } else {
                            translog.sync();
                            synced = true;
                        }
                        for (Translog.Operation op : ops) {
                            assertThat("seq# " + op.seqNo() + " was not marked as persisted", persistedSeqNos, hasItem(op.seqNo()));
                        }
                        Checkpoint checkpoint = translog.getLastSyncedCheckpoint();
                        assertThat(checkpoint.offset, greaterThanOrEqualTo(location.translogLocation));
                        for (Translog.Operation op : ops) {
                            assertThat(checkpoint.minSeqNo, lessThanOrEqualTo(op.seqNo()));
                            assertThat(checkpoint.maxSeqNo, greaterThanOrEqualTo(op.seqNo()));
                        }
                        if (synced) {
                            assertThat(checkpoint.globalCheckpoint, greaterThanOrEqualTo(globalCheckpoint));
                        }
                    } catch (Exception e) {
                        throw new AssertionError(e);
                    }
                }
            });
            threads[t].start();
        }
        for (Thread thread : threads) {
            thread.join();
        }
    }
}
Also used : Path(java.nio.file.Path) Matchers.hasToString(org.hamcrest.Matchers.hasToString) Matchers.containsString(org.hamcrest.Matchers.containsString) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) InvalidPathException(java.nio.file.InvalidPathException) MissingHistoryOperationsException(org.opensearch.index.engine.MissingHistoryOperationsException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) EOFException(java.io.EOFException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) LongSupplier(java.util.function.LongSupplier) Phaser(java.util.concurrent.Phaser) Location(org.opensearch.index.translog.Translog.Location)

Example 5 with Location

use of org.opensearch.index.translog.Translog.Location in project OpenSearch by opensearch-project.

the class TranslogTests method testConcurrentWriteViewsAndSnapshot.

/**
 * Tests that concurrent readers and writes maintain view and snapshot semantics
 */
public void testConcurrentWriteViewsAndSnapshot() throws Throwable {
    final Thread[] writers = new Thread[randomIntBetween(1, 3)];
    final Thread[] readers = new Thread[randomIntBetween(1, 3)];
    final int flushEveryOps = randomIntBetween(5, 100);
    final int maxOps = randomIntBetween(200, 1000);
    final Object signalReaderSomeDataWasIndexed = new Object();
    final AtomicLong idGenerator = new AtomicLong();
    final CyclicBarrier barrier = new CyclicBarrier(writers.length + readers.length + 1);
    // a map of all written ops and their returned location.
    final Map<Translog.Operation, Translog.Location> writtenOps = ConcurrentCollections.newConcurrentMap();
    // a signal for all threads to stop
    final AtomicBoolean run = new AtomicBoolean(true);
    final Object flushMutex = new Object();
    final AtomicLong lastCommittedLocalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED);
    final LocalCheckpointTracker tracker = LocalCheckpointTrackerTests.createEmptyTracker();
    final TranslogDeletionPolicy deletionPolicy = translog.getDeletionPolicy();
    // any errors on threads
    final List<Exception> errors = new CopyOnWriteArrayList<>();
    logger.info("using [{}] readers. [{}] writers. flushing every ~[{}] ops.", readers.length, writers.length, flushEveryOps);
    for (int i = 0; i < writers.length; i++) {
        final String threadName = "writer_" + i;
        final int threadId = i;
        writers[i] = new Thread(new AbstractRunnable() {

            @Override
            public void doRun() throws BrokenBarrierException, InterruptedException, IOException {
                barrier.await();
                int counter = 0;
                while (run.get() && idGenerator.get() < maxOps) {
                    long id = idGenerator.getAndIncrement();
                    final Translog.Operation op;
                    final Translog.Operation.Type type = Translog.Operation.Type.values()[((int) (id % Translog.Operation.Type.values().length))];
                    switch(type) {
                        case CREATE:
                        case INDEX:
                            op = new Translog.Index("type", "" + id, id, primaryTerm.get(), new byte[] { (byte) id });
                            break;
                        case DELETE:
                            op = new Translog.Delete("test", Long.toString(id), id, primaryTerm.get(), newUid(Long.toString(id)));
                            break;
                        case NO_OP:
                            op = new Translog.NoOp(id, 1, Long.toString(id));
                            break;
                        default:
                            throw new AssertionError("unsupported operation type [" + type + "]");
                    }
                    Translog.Location location = translog.add(op);
                    tracker.markSeqNoAsProcessed(id);
                    Translog.Location existing = writtenOps.put(op, location);
                    if (existing != null) {
                        fail("duplicate op [" + op + "], old entry at " + location);
                    }
                    if (id % writers.length == threadId) {
                        translog.ensureSynced(location);
                    }
                    if (id % flushEveryOps == 0) {
                        synchronized (flushMutex) {
                            // we need not do this concurrently as we need to make sure that the generation
                            // we're committing - is still present when we're committing
                            long localCheckpoint = tracker.getProcessedCheckpoint();
                            translog.rollGeneration();
                            // expose the new checkpoint (simulating a commit), before we trim the translog
                            lastCommittedLocalCheckpoint.set(localCheckpoint);
                            deletionPolicy.setLocalCheckpointOfSafeCommit(localCheckpoint);
                            translog.trimUnreferencedReaders();
                        }
                    }
                    if (id % 7 == 0) {
                        synchronized (signalReaderSomeDataWasIndexed) {
                            signalReaderSomeDataWasIndexed.notifyAll();
                        }
                    }
                    counter++;
                }
                logger.info("--> [{}] done. wrote [{}] ops.", threadName, counter);
            }

            @Override
            public void onFailure(Exception e) {
                logger.error(() -> new ParameterizedMessage("--> writer [{}] had an error", threadName), e);
                errors.add(e);
            }
        }, threadName);
        writers[i].start();
    }
    for (int i = 0; i < readers.length; i++) {
        final String threadId = "reader_" + i;
        readers[i] = new Thread(new AbstractRunnable() {

            Closeable retentionLock = null;

            long committedLocalCheckpointAtView;

            @Override
            public void onFailure(Exception e) {
                logger.error(() -> new ParameterizedMessage("--> reader [{}] had an error", threadId), e);
                errors.add(e);
                try {
                    closeRetentionLock();
                } catch (IOException inner) {
                    inner.addSuppressed(e);
                    logger.error("unexpected error while closing view, after failure", inner);
                }
            }

            void closeRetentionLock() throws IOException {
                if (retentionLock != null) {
                    retentionLock.close();
                }
            }

            void acquireRetentionLock() throws IOException {
                closeRetentionLock();
                retentionLock = translog.acquireRetentionLock();
                // captures the last committed checkpoint, while holding the view, simulating
                // recovery logic which captures a view and gets a lucene commit
                committedLocalCheckpointAtView = lastCommittedLocalCheckpoint.get();
                logger.info("--> [{}] min gen after acquiring lock [{}]", threadId, translog.getMinFileGeneration());
            }

            @Override
            protected void doRun() throws Exception {
                barrier.await();
                int iter = 0;
                while (idGenerator.get() < maxOps) {
                    if (iter++ % 10 == 0) {
                        acquireRetentionLock();
                    }
                    // captures al views that are written since the view was created (with a small caveat see bellow)
                    // these are what we expect the snapshot to return (and potentially some more).
                    Set<Translog.Operation> expectedOps = new HashSet<>(writtenOps.keySet());
                    expectedOps.removeIf(op -> op.seqNo() <= committedLocalCheckpointAtView);
                    try (Translog.Snapshot snapshot = translog.newSnapshot(committedLocalCheckpointAtView + 1L, Long.MAX_VALUE)) {
                        Translog.Operation op;
                        while ((op = snapshot.next()) != null) {
                            expectedOps.remove(op);
                        }
                    }
                    if (expectedOps.isEmpty() == false) {
                        StringBuilder missed = new StringBuilder("missed ").append(expectedOps.size()).append(" operations from [").append(committedLocalCheckpointAtView + 1L).append("]");
                        boolean failed = false;
                        for (Translog.Operation expectedOp : expectedOps) {
                            final Translog.Location loc = writtenOps.get(expectedOp);
                            failed = true;
                            missed.append("\n --> [").append(expectedOp).append("] written at ").append(loc);
                        }
                        if (failed) {
                            fail(missed.toString());
                        }
                    }
                    // slow down things a bit and spread out testing..
                    synchronized (signalReaderSomeDataWasIndexed) {
                        if (idGenerator.get() < maxOps) {
                            signalReaderSomeDataWasIndexed.wait();
                        }
                    }
                }
                closeRetentionLock();
                logger.info("--> [{}] done. tested [{}] snapshots", threadId, iter);
            }
        }, threadId);
        readers[i].start();
    }
    barrier.await();
    logger.debug("--> waiting for threads to stop");
    for (Thread thread : writers) {
        thread.join();
    }
    logger.debug("--> waiting for readers to stop");
    // force stopping, if all writers crashed
    synchronized (signalReaderSomeDataWasIndexed) {
        idGenerator.set(Long.MAX_VALUE);
        signalReaderSomeDataWasIndexed.notifyAll();
    }
    for (Thread thread : readers) {
        thread.join();
    }
    if (errors.size() > 0) {
        Throwable e = errors.get(0);
        for (Throwable suppress : errors.subList(1, errors.size())) {
            e.addSuppressed(suppress);
        }
        throw e;
    }
    logger.info("--> test done. total ops written [{}]", writtenOps.size());
}
Also used : AbstractRunnable(org.opensearch.common.util.concurrent.AbstractRunnable) Closeable(java.io.Closeable) Matchers.hasToString(org.hamcrest.Matchers.hasToString) Matchers.containsString(org.hamcrest.Matchers.containsString) LocalCheckpointTracker(org.opensearch.index.seqno.LocalCheckpointTracker) Location(org.opensearch.index.translog.Translog.Location) HashSet(java.util.HashSet) IOException(java.io.IOException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) InvalidPathException(java.nio.file.InvalidPathException) MissingHistoryOperationsException(org.opensearch.index.engine.MissingHistoryOperationsException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) EOFException(java.io.EOFException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) CyclicBarrier(java.util.concurrent.CyclicBarrier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) TranslogDeletionPolicies.createTranslogDeletionPolicy(org.opensearch.index.translog.TranslogDeletionPolicies.createTranslogDeletionPolicy) Location(org.opensearch.index.translog.Translog.Location) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Aggregations

Location (org.opensearch.index.translog.Translog.Location)7 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)5 ArrayList (java.util.ArrayList)4 EOFException (java.io.EOFException)2 IOException (java.io.IOException)2 FileAlreadyExistsException (java.nio.file.FileAlreadyExistsException)2 InvalidPathException (java.nio.file.InvalidPathException)2 Path (java.nio.file.Path)2 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)2 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)2 Matchers.containsString (org.hamcrest.Matchers.containsString)2 Matchers.hasToString (org.hamcrest.Matchers.hasToString)2 MissingHistoryOperationsException (org.opensearch.index.engine.MissingHistoryOperationsException)2 Closeable (java.io.Closeable)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 CyclicBarrier (java.util.concurrent.CyclicBarrier)1