Search in sources :

Example 1 with WriteEntry

use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.

the class HRegion method internalPrepareFlushCache.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DLS_DEAD_LOCAL_STORE", justification = "FindBugs seems confused about trxId")
protected PrepareFlushResult internalPrepareFlushCache(final WAL wal, final long myseqid, final Collection<Store> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker) throws IOException {
    if (this.rsServices != null && this.rsServices.isAborted()) {
        // Don't flush when server aborting, it's unsafe
        throw new IOException("Aborting flush because server is aborted...");
    }
    final long startTime = EnvironmentEdgeManager.currentTime();
    // to go get one.
    if (this.memstoreDataSize.get() <= 0) {
        // Take an update lock so no edits can come into memory just yet.
        this.updatesLock.writeLock().lock();
        WriteEntry writeEntry = null;
        try {
            if (this.memstoreDataSize.get() <= 0) {
                // Presume that if there are still no edits in the memstore, then there are no edits for
                // this region out in the WAL subsystem so no need to do any trickery clearing out
                // edits in the WAL sub-system. Up the sequence number so the resulting flush id is for
                // sure just beyond the last appended region edit and not associated with any edit
                // (useful as marker when bulk loading, etc.).
                FlushResult flushResult = null;
                if (wal != null) {
                    writeEntry = mvcc.begin();
                    long flushOpSeqId = writeEntry.getWriteNumber();
                    flushResult = new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, flushOpSeqId, "Nothing to flush", writeFlushRequestMarkerToWAL(wal, writeFlushWalMarker));
                    mvcc.completeAndWait(writeEntry);
                    // Set to null so we don't complete it again down in finally block.
                    writeEntry = null;
                    return new PrepareFlushResult(flushResult, myseqid);
                } else {
                    return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, "Nothing to flush", false), myseqid);
                }
            }
        } finally {
            if (writeEntry != null) {
                // If writeEntry is non-null, this operation failed; the mvcc transaction failed...
                // but complete it anyways so it doesn't block the mvcc queue.
                mvcc.complete(writeEntry);
            }
            this.updatesLock.writeLock().unlock();
        }
    }
    logFatLineOnFlush(storesToFlush, myseqid);
    // Stop updates while we snapshot the memstore of all of these regions' stores. We only have
    // to do this for a moment.  It is quick. We also set the memstore size to zero here before we
    // allow updates again so its value will represent the size of the updates received
    // during flush
    // We have to take an update lock during snapshot, or else a write could end up in both snapshot
    // and memstore (makes it difficult to do atomic rows then)
    status.setStatus("Obtaining lock to block concurrent updates");
    // block waiting for the lock for internal flush
    this.updatesLock.writeLock().lock();
    status.setStatus("Preparing flush snapshotting stores in " + getRegionInfo().getEncodedName());
    MemstoreSize totalSizeOfFlushableStores = new MemstoreSize();
    Map<byte[], Long> flushedFamilyNamesToSeq = new HashMap<>();
    for (Store store : storesToFlush) {
        flushedFamilyNamesToSeq.put(store.getFamily().getName(), ((HStore) store).preFlushSeqIDEstimation());
    }
    TreeMap<byte[], StoreFlushContext> storeFlushCtxs = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    TreeMap<byte[], List<Path>> committedFiles = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    TreeMap<byte[], MemstoreSize> storeFlushableSize = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    // The sequence id of this flush operation which is used to log FlushMarker and pass to
    // createFlushContext to use as the store file's sequence id. It can be in advance of edits
    // still in the memstore, edits that are in other column families yet to be flushed.
    long flushOpSeqId = HConstants.NO_SEQNUM;
    // The max flushed sequence id after this flush operation completes. All edits in memstore
    // will be in advance of this sequence id.
    long flushedSeqId = HConstants.NO_SEQNUM;
    byte[] encodedRegionName = getRegionInfo().getEncodedNameAsBytes();
    try {
        if (wal != null) {
            Long earliestUnflushedSequenceIdForTheRegion = wal.startCacheFlush(encodedRegionName, flushedFamilyNamesToSeq);
            if (earliestUnflushedSequenceIdForTheRegion == null) {
                // This should never happen. This is how startCacheFlush signals flush cannot proceed.
                String msg = this.getRegionInfo().getEncodedName() + " flush aborted; WAL closing.";
                status.setStatus(msg);
                return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false), myseqid);
            }
            flushOpSeqId = getNextSequenceId(wal);
            // Back up 1, minus 1 from oldest sequence id in memstore to get last 'flushed' edit
            flushedSeqId = earliestUnflushedSequenceIdForTheRegion.longValue() == HConstants.NO_SEQNUM ? flushOpSeqId : earliestUnflushedSequenceIdForTheRegion.longValue() - 1;
        } else {
            // use the provided sequence Id as WAL is not being used for this flush.
            flushedSeqId = flushOpSeqId = myseqid;
        }
        for (Store s : storesToFlush) {
            MemstoreSize flushableSize = s.getSizeToFlush();
            totalSizeOfFlushableStores.incMemstoreSize(flushableSize);
            storeFlushCtxs.put(s.getFamily().getName(), s.createFlushContext(flushOpSeqId));
            // for writing stores to WAL
            committedFiles.put(s.getFamily().getName(), null);
            storeFlushableSize.put(s.getFamily().getName(), flushableSize);
        }
        // write the snapshot start to WAL
        if (wal != null && !writestate.readOnly) {
            FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.START_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
            // No sync. Sync is below where no updates lock and we do FlushAction.COMMIT_FLUSH
            WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, false, mvcc);
        }
        // Prepare flush (take a snapshot)
        for (StoreFlushContext flush : storeFlushCtxs.values()) {
            flush.prepare();
        }
    } catch (IOException ex) {
        doAbortFlushToWAL(wal, flushOpSeqId, committedFiles);
        throw ex;
    } finally {
        this.updatesLock.writeLock().unlock();
    }
    String s = "Finished memstore snapshotting " + this + ", syncing WAL and waiting on mvcc, " + "flushsize=" + totalSizeOfFlushableStores;
    status.setStatus(s);
    doSyncOfUnflushedWALChanges(wal, getRegionInfo());
    return new PrepareFlushResult(storeFlushCtxs, committedFiles, storeFlushableSize, startTime, flushOpSeqId, flushedSeqId, totalSizeOfFlushableStores);
}
Also used : WriteEntry(org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) TreeMap(java.util.TreeMap) FlushDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor) StoreFlushDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor) AtomicLong(java.util.concurrent.atomic.AtomicLong) ArrayList(java.util.ArrayList) AbstractList(java.util.AbstractList) List(java.util.List)

Example 2 with WriteEntry

use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.

the class HRegion method doDelta.

/**
   * Add "deltas" to Cells. Deltas are increments or appends. Switch on <code>op</code>.
   *
   * <p>If increment, add deltas to current values or if an append, then
   * append the deltas to the current Cell values.
   *
   * <p>Append and Increment code paths are mostly the same. They differ in just a few places.
   * This method does the code path for increment and append and then in key spots, switches
   * on the passed in <code>op</code> to do increment or append specific paths.
   */
private Result doDelta(Operation op, Mutation mutation, long nonceGroup, long nonce, boolean returnResults) throws IOException {
    checkReadOnly();
    checkResources();
    checkRow(mutation.getRow(), op.toString());
    checkFamilies(mutation.getFamilyCellMap().keySet());
    this.writeRequestsCount.increment();
    WriteEntry writeEntry = null;
    startRegionOperation(op);
    List<Cell> results = returnResults ? new ArrayList<>(mutation.size()) : null;
    RowLock rowLock = null;
    MemstoreSize memstoreSize = new MemstoreSize();
    try {
        rowLock = getRowLockInternal(mutation.getRow(), false);
        lock(this.updatesLock.readLock());
        try {
            Result cpResult = doCoprocessorPreCall(op, mutation);
            if (cpResult != null) {
                return returnResults ? cpResult : null;
            }
            Durability effectiveDurability = getEffectiveDurability(mutation.getDurability());
            Map<Store, List<Cell>> forMemStore = new HashMap<>(mutation.getFamilyCellMap().size());
            // Reckon Cells to apply to WAL --  in returned walEdit -- and what to add to memstore and
            // what to return back to the client (in 'forMemStore' and 'results' respectively).
            WALEdit walEdit = reckonDeltas(op, mutation, effectiveDurability, forMemStore, results);
            // Actually write to WAL now if a walEdit to apply.
            if (walEdit != null && !walEdit.isEmpty()) {
                writeEntry = doWALAppend(walEdit, effectiveDurability, nonceGroup, nonce);
            } else {
                // If walEdits is empty, it means we skipped the WAL; update LongAdders and start an mvcc
                // transaction.
                recordMutationWithoutWal(mutation.getFamilyCellMap());
                writeEntry = mvcc.begin();
                updateSequenceId(forMemStore.values(), writeEntry.getWriteNumber());
            }
            // Now write to MemStore. Do it a column family at a time.
            for (Map.Entry<Store, List<Cell>> e : forMemStore.entrySet()) {
                applyToMemstore(e.getKey(), e.getValue(), true, memstoreSize);
            }
            mvcc.completeAndWait(writeEntry);
            if (rsServices != null && rsServices.getNonceManager() != null) {
                rsServices.getNonceManager().addMvccToOperationContext(nonceGroup, nonce, writeEntry.getWriteNumber());
            }
            writeEntry = null;
        } finally {
            this.updatesLock.readLock().unlock();
        }
        // If results is null, then client asked that we not return the calculated results.
        return results != null && returnResults ? Result.create(results) : Result.EMPTY_RESULT;
    } finally {
        // a 0 increment.
        if (writeEntry != null)
            mvcc.complete(writeEntry);
        if (rowLock != null) {
            rowLock.release();
        }
        // Request a cache flush if over the limit.  Do it outside update lock.
        if (isFlushSize(addAndGetMemstoreSize(memstoreSize))) {
            requestFlush();
        }
        closeRegionOperation(op);
        if (this.metricsRegion != null) {
            switch(op) {
                case INCREMENT:
                    this.metricsRegion.updateIncrement();
                    break;
                case APPEND:
                    this.metricsRegion.updateAppend();
                    break;
                default:
                    break;
            }
        }
    }
}
Also used : WriteEntry(org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Durability(org.apache.hadoop.hbase.client.Durability) Result(org.apache.hadoop.hbase.client.Result) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) ArrayList(java.util.ArrayList) AbstractList(java.util.AbstractList) List(java.util.List) Cell(org.apache.hadoop.hbase.Cell) Map(java.util.Map) TreeMap(java.util.TreeMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap)

Example 3 with WriteEntry

use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.

the class WALUtil method doFullMarkerAppendTransaction.

/**
 * A 'full' WAL transaction involves starting an mvcc transaction followed by an append, an
 * optional sync, and then a call to complete the mvcc transaction. This method does it all. Good
 * for case of adding a single edit or marker to the WAL.
 * <p/>
 * This write is for internal use only. Not for external client consumption.
 * @return WALKeyImpl that was added to the WAL.
 */
private static WALKeyImpl doFullMarkerAppendTransaction(final WAL wal, final NavigableMap<byte[], Integer> replicationScope, final RegionInfo hri, final WALEdit edit, final MultiVersionConcurrencyControl mvcc, final Map<String, byte[]> extendedAttributes, final boolean sync, final RegionReplicationSink sink) throws IOException {
    // TODO: Pass in current time to use?
    WALKeyImpl walKey = new WALKeyImpl(hri.getEncodedNameAsBytes(), hri.getTable(), EnvironmentEdgeManager.currentTime(), mvcc, replicationScope, extendedAttributes);
    long trx = MultiVersionConcurrencyControl.NONE;
    try {
        trx = wal.appendMarker(hri, walKey, edit);
        WriteEntry writeEntry = walKey.getWriteEntry();
        if (sink != null) {
            writeEntry.attachCompletionAction(() -> sink.add(walKey, edit, RpcServer.getCurrentServerCallWithCellScanner().orElse(null)));
        }
        if (sync) {
            wal.sync(trx);
        }
        // Call complete only here because these are markers only. They are not for clients to read.
        mvcc.complete(writeEntry);
    } catch (IOException ioe) {
        if (walKey.getWriteEntry() != null) {
            mvcc.complete(walKey.getWriteEntry());
        }
        throw ioe;
    }
    return walKey;
}
Also used : WriteEntry(org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry) WALKeyImpl(org.apache.hadoop.hbase.wal.WALKeyImpl) IOException(java.io.IOException)

Example 4 with WriteEntry

use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.

the class HRegion method processRowsWithLocks.

@Override
public void processRowsWithLocks(RowProcessor<?, ?> processor, long timeout, long nonceGroup, long nonce) throws IOException {
    for (byte[] row : processor.getRowsToLock()) {
        checkRow(row, "processRowsWithLocks");
    }
    if (!processor.readOnly()) {
        checkReadOnly();
    }
    checkResources();
    startRegionOperation();
    WALEdit walEdit = new WALEdit();
    // STEP 1. Run pre-process hook
    preProcess(processor, walEdit);
    // Short circuit the read only case
    if (processor.readOnly()) {
        try {
            long now = EnvironmentEdgeManager.currentTime();
            doProcessRowWithTimeout(processor, now, this, null, null, timeout);
            processor.postProcess(this, walEdit, true);
        } finally {
            closeRegionOperation();
        }
        return;
    }
    boolean locked = false;
    List<RowLock> acquiredRowLocks = null;
    List<Mutation> mutations = new ArrayList<>();
    Collection<byte[]> rowsToLock = processor.getRowsToLock();
    // This is assigned by mvcc either explicity in the below or in the guts of the WAL append
    // when it assigns the edit a sequencedid (A.K.A the mvcc write number).
    WriteEntry writeEntry = null;
    MemstoreSize memstoreSize = new MemstoreSize();
    try {
        boolean success = false;
        try {
            // STEP 2. Acquire the row lock(s)
            acquiredRowLocks = new ArrayList<>(rowsToLock.size());
            for (byte[] row : rowsToLock) {
                // Attempt to lock all involved rows, throw if any lock times out
                // use a writer lock for mixed reads and writes
                acquiredRowLocks.add(getRowLockInternal(row, false));
            }
            // STEP 3. Region lock
            lock(this.updatesLock.readLock(), acquiredRowLocks.isEmpty() ? 1 : acquiredRowLocks.size());
            locked = true;
            long now = EnvironmentEdgeManager.currentTime();
            // STEP 4. Let the processor scan the rows, generate mutations and add waledits
            doProcessRowWithTimeout(processor, now, this, mutations, walEdit, timeout);
            if (!mutations.isEmpty()) {
                // STEP 5. Call the preBatchMutate hook
                processor.preBatchMutate(this, walEdit);
                // STEP 6. Append and sync if walEdit has data to write out.
                if (!walEdit.isEmpty()) {
                    writeEntry = doWALAppend(walEdit, getEffectiveDurability(processor.useDurability()), processor.getClusterIds(), now, nonceGroup, nonce);
                } else {
                    // We are here if WAL is being skipped.
                    writeEntry = this.mvcc.begin();
                }
                // STEP 7. Apply to memstore
                long sequenceId = writeEntry.getWriteNumber();
                for (Mutation m : mutations) {
                    // Handle any tag based cell features.
                    // TODO: Do we need to call rewriteCellTags down in applyToMemstore()? Why not before
                    // so tags go into WAL?
                    rewriteCellTags(m.getFamilyCellMap(), m);
                    for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance(); ) {
                        Cell cell = cellScanner.current();
                        if (walEdit.isEmpty()) {
                            // If walEdit is empty, we put nothing in WAL. WAL stamps Cells with sequence id.
                            // If no WAL, need to stamp it here.
                            CellUtil.setSequenceId(cell, sequenceId);
                        }
                        applyToMemstore(getHStore(cell), cell, memstoreSize);
                    }
                }
                // STEP 8. call postBatchMutate hook
                processor.postBatchMutate(this);
                // STEP 9. Complete mvcc.
                mvcc.completeAndWait(writeEntry);
                writeEntry = null;
                // STEP 10. Release region lock
                if (locked) {
                    this.updatesLock.readLock().unlock();
                    locked = false;
                }
                // STEP 11. Release row lock(s)
                releaseRowLocks(acquiredRowLocks);
            }
            success = true;
        } finally {
            // Call complete rather than completeAndWait because we probably had error if walKey != null
            if (writeEntry != null)
                mvcc.complete(writeEntry);
            if (locked) {
                this.updatesLock.readLock().unlock();
            }
            // release locks if some were acquired but another timed out
            releaseRowLocks(acquiredRowLocks);
        }
        // 12. Run post-process hook
        processor.postProcess(this, walEdit, success);
    } finally {
        closeRegionOperation();
        if (!mutations.isEmpty()) {
            long newSize = this.addAndGetMemstoreSize(memstoreSize);
            requestFlushIfNeeded(newSize);
        }
    }
}
Also used : WriteEntry(org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry) ArrayList(java.util.ArrayList) CellScanner(org.apache.hadoop.hbase.CellScanner) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Mutation(org.apache.hadoop.hbase.client.Mutation) Cell(org.apache.hadoop.hbase.Cell)

Example 5 with WriteEntry

use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.

the class HRegion method internalPrepareFlushCache.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DLS_DEAD_LOCAL_STORE", justification = "FindBugs seems confused about trxId")
protected PrepareFlushResult internalPrepareFlushCache(WAL wal, long myseqid, Collection<HStore> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker, FlushLifeCycleTracker tracker) throws IOException {
    if (this.rsServices != null && this.rsServices.isAborted()) {
        // Don't flush when server aborting, it's unsafe
        throw new IOException("Aborting flush because server is aborted...");
    }
    final long startTime = EnvironmentEdgeManager.currentTime();
    // to go get one.
    if (this.memStoreSizing.getDataSize() <= 0) {
        // Take an update lock so no edits can come into memory just yet.
        this.updatesLock.writeLock().lock();
        WriteEntry writeEntry = null;
        try {
            if (this.memStoreSizing.getDataSize() <= 0) {
                // (useful as marker when bulk loading, etc.).
                if (wal != null) {
                    writeEntry = mvcc.begin();
                    long flushOpSeqId = writeEntry.getWriteNumber();
                    FlushResultImpl flushResult = new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, flushOpSeqId, "Nothing to flush", writeFlushRequestMarkerToWAL(wal, writeFlushWalMarker));
                    mvcc.completeAndWait(writeEntry);
                    // Set to null so we don't complete it again down in finally block.
                    writeEntry = null;
                    return new PrepareFlushResult(flushResult, myseqid);
                } else {
                    return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, "Nothing to flush", false), myseqid);
                }
            }
        } finally {
            if (writeEntry != null) {
                // If writeEntry is non-null, this operation failed; the mvcc transaction failed...
                // but complete it anyways so it doesn't block the mvcc queue.
                mvcc.complete(writeEntry);
            }
            this.updatesLock.writeLock().unlock();
        }
    }
    logFatLineOnFlush(storesToFlush, myseqid);
    // Stop updates while we snapshot the memstore of all of these regions' stores. We only have
    // to do this for a moment.  It is quick. We also set the memstore size to zero here before we
    // allow updates again so its value will represent the size of the updates received
    // during flush
    // We have to take an update lock during snapshot, or else a write could end up in both snapshot
    // and memstore (makes it difficult to do atomic rows then)
    status.setStatus("Obtaining lock to block concurrent updates");
    // block waiting for the lock for internal flush
    this.updatesLock.writeLock().lock();
    status.setStatus("Preparing flush snapshotting stores in " + getRegionInfo().getEncodedName());
    MemStoreSizing totalSizeOfFlushableStores = new NonThreadSafeMemStoreSizing();
    Map<byte[], Long> flushedFamilyNamesToSeq = new HashMap<>();
    for (HStore store : storesToFlush) {
        flushedFamilyNamesToSeq.put(store.getColumnFamilyDescriptor().getName(), store.preFlushSeqIDEstimation());
    }
    TreeMap<byte[], StoreFlushContext> storeFlushCtxs = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    TreeMap<byte[], List<Path>> committedFiles = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    TreeMap<byte[], MemStoreSize> storeFlushableSize = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    // The sequence id of this flush operation which is used to log FlushMarker and pass to
    // createFlushContext to use as the store file's sequence id. It can be in advance of edits
    // still in the memstore, edits that are in other column families yet to be flushed.
    long flushOpSeqId = HConstants.NO_SEQNUM;
    // The max flushed sequence id after this flush operation completes. All edits in memstore
    // will be in advance of this sequence id.
    long flushedSeqId = HConstants.NO_SEQNUM;
    byte[] encodedRegionName = getRegionInfo().getEncodedNameAsBytes();
    try {
        if (wal != null) {
            Long earliestUnflushedSequenceIdForTheRegion = wal.startCacheFlush(encodedRegionName, flushedFamilyNamesToSeq);
            if (earliestUnflushedSequenceIdForTheRegion == null) {
                // This should never happen. This is how startCacheFlush signals flush cannot proceed.
                String msg = this.getRegionInfo().getEncodedName() + " flush aborted; WAL closing.";
                status.setStatus(msg);
                return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false), myseqid);
            }
            flushOpSeqId = getNextSequenceId(wal);
            // Back up 1, minus 1 from oldest sequence id in memstore to get last 'flushed' edit
            flushedSeqId = earliestUnflushedSequenceIdForTheRegion.longValue() == HConstants.NO_SEQNUM ? flushOpSeqId : earliestUnflushedSequenceIdForTheRegion.longValue() - 1;
        } else {
            // use the provided sequence Id as WAL is not being used for this flush.
            flushedSeqId = flushOpSeqId = myseqid;
        }
        for (HStore s : storesToFlush) {
            storeFlushCtxs.put(s.getColumnFamilyDescriptor().getName(), s.createFlushContext(flushOpSeqId, tracker));
            // for writing stores to WAL
            committedFiles.put(s.getColumnFamilyDescriptor().getName(), null);
        }
        // write the snapshot start to WAL
        if (wal != null && !writestate.readOnly) {
            FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.START_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
            // No sync. Sync is below where no updates lock and we do FlushAction.COMMIT_FLUSH
            WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, false, mvcc, regionReplicationSink.orElse(null));
        }
        // Prepare flush (take a snapshot)
        storeFlushCtxs.forEach((name, flush) -> {
            MemStoreSize snapshotSize = flush.prepare();
            totalSizeOfFlushableStores.incMemStoreSize(snapshotSize);
            storeFlushableSize.put(name, snapshotSize);
        });
    } catch (IOException ex) {
        doAbortFlushToWAL(wal, flushOpSeqId, committedFiles);
        throw ex;
    } finally {
        this.updatesLock.writeLock().unlock();
    }
    String s = "Finished memstore snapshotting " + this + ", syncing WAL and waiting on mvcc, " + "flushsize=" + totalSizeOfFlushableStores;
    status.setStatus(s);
    doSyncOfUnflushedWALChanges(wal, getRegionInfo());
    return new PrepareFlushResult(storeFlushCtxs, committedFiles, storeFlushableSize, startTime, flushOpSeqId, flushedSeqId, totalSizeOfFlushableStores);
}
Also used : WriteEntry(org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) InterruptedIOException(java.io.InterruptedIOException) TreeMap(java.util.TreeMap) FlushDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor) StoreFlushDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

WriteEntry (org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry)8 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 InterruptedIOException (java.io.InterruptedIOException)3 HashMap (java.util.HashMap)3 List (java.util.List)3 TreeMap (java.util.TreeMap)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)3 TimeoutIOException (org.apache.hadoop.hbase.exceptions.TimeoutIOException)3 AbstractList (java.util.AbstractList)2 Cell (org.apache.hadoop.hbase.Cell)2 Mutation (org.apache.hadoop.hbase.client.Mutation)2 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)2 FlushDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor)2 StoreFlushDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor)2 WALKeyImpl (org.apache.hadoop.hbase.wal.WALKeyImpl)2 Map (java.util.Map)1 NavigableMap (java.util.NavigableMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1