Search in sources :

Example 16 with WALKey

use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.

the class RSRpcServices method replay.

   * Replay the given changes when distributedLogReplay WAL edits from a failed RS. The guarantee is
   * that the given mutations will be durable on the receiving RS if this method returns without any
   * exception.
   * @param controller the RPC controller
   * @param request the request
   * @throws ServiceException
@QosPriority(priority = HConstants.REPLAY_QOS)
public ReplicateWALEntryResponse replay(final RpcController controller, final ReplicateWALEntryRequest request) throws ServiceException {
    long before = EnvironmentEdgeManager.currentTime();
    CellScanner cells = ((HBaseRpcController) controller).cellScanner();
    try {
        List<WALEntry> entries = request.getEntryList();
        if (entries == null || entries.isEmpty()) {
            // empty input
            return ReplicateWALEntryResponse.newBuilder().build();
        ByteString regionName = entries.get(0).getKey().getEncodedRegionName();
        Region region = regionServer.getRegionByEncodedName(regionName.toStringUtf8());
        RegionCoprocessorHost coprocessorHost = ServerRegionReplicaUtil.isDefaultReplica(region.getRegionInfo()) ? region.getCoprocessorHost() : // do not invoke coprocessors if this is a secondary region replica
        List<Pair<WALKey, WALEdit>> walEntries = new ArrayList<>();
        // Skip adding the edits to WAL if this is a secondary region replica
        boolean isPrimary = RegionReplicaUtil.isDefaultReplica(region.getRegionInfo());
        Durability durability = isPrimary ? Durability.USE_DEFAULT : Durability.SKIP_WAL;
        for (WALEntry entry : entries) {
            if (!regionName.equals(entry.getKey().getEncodedRegionName())) {
                throw new NotServingRegionException("Replay request contains entries from multiple " + "regions. First region:" + regionName.toStringUtf8() + " , other region:" + entry.getKey().getEncodedRegionName());
            if (regionServer.nonceManager != null && isPrimary) {
                long nonceGroup = entry.getKey().hasNonceGroup() ? entry.getKey().getNonceGroup() : HConstants.NO_NONCE;
                long nonce = entry.getKey().hasNonce() ? entry.getKey().getNonce() : HConstants.NO_NONCE;
                regionServer.nonceManager.reportOperationFromWal(nonceGroup, nonce, entry.getKey().getWriteTime());
            Pair<WALKey, WALEdit> walEntry = (coprocessorHost == null) ? null : new Pair<>();
            List<WALSplitter.MutationReplay> edits = WALSplitter.getMutationsFromWALEntry(entry, cells, walEntry, durability);
            if (coprocessorHost != null) {
                // KeyValue.
                if (coprocessorHost.preWALRestore(region.getRegionInfo(), walEntry.getFirst(), walEntry.getSecond())) {
                    // if bypass this log entry, ignore it ...
            if (edits != null && !edits.isEmpty()) {
                long replaySeqId = (entry.getKey().hasOrigSequenceNumber()) ? entry.getKey().getOrigSequenceNumber() : entry.getKey().getLogSequenceNumber();
                OperationStatus[] result = doReplayBatchOp(region, edits, replaySeqId);
                // check if it's a partial success
                for (int i = 0; result != null && i < result.length; i++) {
                    if (result[i] != OperationStatus.SUCCESS) {
                        throw new IOException(result[i].getExceptionMsg());
        //sync wal at the end because ASYNC_WAL is used above
        WAL wal = getWAL(region);
        if (wal != null) {
        if (coprocessorHost != null) {
            for (Pair<WALKey, WALEdit> entry : walEntries) {
                coprocessorHost.postWALRestore(region.getRegionInfo(), entry.getFirst(), entry.getSecond());
        return ReplicateWALEntryResponse.newBuilder().build();
    } catch (IOException ie) {
        throw new ServiceException(ie);
    } finally {
        if (regionServer.metricsRegionServer != null) {
            regionServer.metricsRegionServer.updateReplay(EnvironmentEdgeManager.currentTime() - before);
Also used : WAL(org.apache.hadoop.hbase.wal.WAL) ByteString( ArrayList(java.util.ArrayList) CellScanner(org.apache.hadoop.hbase.CellScanner) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Pair(org.apache.hadoop.hbase.util.Pair) NameInt64Pair(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameInt64Pair) NameBytesPair(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameBytesPair) NotServingRegionException(org.apache.hadoop.hbase.NotServingRegionException) Durability(org.apache.hadoop.hbase.client.Durability) InterruptedIOException( IOException( DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) HBaseRpcController(org.apache.hadoop.hbase.ipc.HBaseRpcController) ServiceException( WALEntry(org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry) QosPriority(org.apache.hadoop.hbase.ipc.QosPriority)

Example 17 with WALKey

use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.

the class HRegion method appendCurrentNonces.

private void appendCurrentNonces(final Mutation mutation, final boolean replay, final WALEdit walEdit, final long now, final long currentNonceGroup, final long currentNonce) throws IOException {
    if (walEdit.isEmpty())
    if (!replay)
        throw new IOException("Multiple nonces per batch and not in replay");
    WALKey walKey = new WALKey(this.getRegionInfo().getEncodedNameAsBytes(), this.htableDescriptor.getTableName(), now, mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc, this.getReplicationScope());
    this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
    // Complete the mvcc transaction started down in append else it will block others
Also used : WALKey(org.apache.hadoop.hbase.wal.WALKey) InterruptedIOException( IOException( MultipleIOException( DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException)

Example 18 with WALKey

use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.

the class HRegion method replayRecoveredEdits.

   * @param edits File of recovered edits.
   * @param maxSeqIdInStores Maximum sequenceid found in each store.  Edits in wal
   * must be larger than this to be replayed for each store.
   * @param reporter
   * @return the sequence id of the last edit added to this region out of the
   * recovered edits log or <code>minSeqId</code> if nothing added from editlogs.
   * @throws IOException
private long replayRecoveredEdits(final Path edits, Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter) throws IOException {
    String msg = "Replaying edits from " + edits;;
    MonitoredTask status = TaskMonitor.get().createStatus(msg);
    FileSystem fs = this.fs.getFileSystem();
    status.setStatus("Opening recovered edits");
    WAL.Reader reader = null;
    try {
        reader = WALFactory.createReader(fs, edits, conf);
        long currentEditSeqId = -1;
        long currentReplaySeqId = -1;
        long firstSeqIdInLog = -1;
        long skippedEdits = 0;
        long editsCount = 0;
        long intervalEdits = 0;
        WAL.Entry entry;
        HStore store = null;
        boolean reported_once = false;
        ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();
        try {
            // How many edits seen before we check elapsed time
            int interval = this.conf.getInt("", 2000);
            // How often to send a progress report (default 1/2 master timeout)
            int period = this.conf.getInt("", 300000);
            long lastReport = EnvironmentEdgeManager.currentTime();
            if (coprocessorHost != null) {
                coprocessorHost.preReplayWALs(this.getRegionInfo(), edits);
            while ((entry = != null) {
                WALKey key = entry.getKey();
                WALEdit val = entry.getEdit();
                if (ng != null) {
                    // some test, or nonces disabled
                    ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
                if (reporter != null) {
                    intervalEdits += val.size();
                    if (intervalEdits >= interval) {
                        // Number of edits interval reached
                        intervalEdits = 0;
                        long cur = EnvironmentEdgeManager.currentTime();
                        if (lastReport + period <= cur) {
                            status.setStatus("Replaying edits..." + " skipped=" + skippedEdits + " edits=" + editsCount);
                            // Timeout reached
                            if (!reporter.progress()) {
                                msg = "Progressable reporter failed, stopping replay";
                                throw new IOException(msg);
                            reported_once = true;
                            lastReport = cur;
                if (firstSeqIdInLog == -1) {
                    firstSeqIdInLog = key.getLogSeqNum();
                if (currentEditSeqId > key.getLogSeqNum()) {
                    // when this condition is true, it means we have a serious defect because we need to
                    // maintain increasing SeqId for WAL edits per region
                    LOG.error(getRegionInfo().getEncodedName() + " : " + "Found decreasing SeqId. PreId=" + currentEditSeqId + " key=" + key + "; edit=" + val);
                } else {
                    currentEditSeqId = key.getLogSeqNum();
                currentReplaySeqId = (key.getOrigLogSeqNum() > 0) ? key.getOrigLogSeqNum() : currentEditSeqId;
                // instead of a KeyValue.
                if (coprocessorHost != null) {
                    status.setStatus("Running pre-WAL-restore hook in coprocessors");
                    if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
                        // if bypass this wal entry, ignore it ...
                boolean checkRowWithinBoundary = false;
                // Check this edit is for this region.
                if (!Bytes.equals(key.getEncodedRegionName(), this.getRegionInfo().getEncodedNameAsBytes())) {
                    checkRowWithinBoundary = true;
                boolean flush = false;
                MemstoreSize memstoreSize = new MemstoreSize();
                for (Cell cell : val.getCells()) {
                    // METACOLUMN info such as HBASE::CACHEFLUSH entries
                    if (CellUtil.matchingFamily(cell, WALEdit.METAFAMILY)) {
                        // if region names don't match, skipp replaying compaction marker
                        if (!checkRowWithinBoundary) {
                            //this is a special edit, we should handle it
                            CompactionDescriptor compaction = WALEdit.getCompaction(cell);
                            if (compaction != null) {
                                //replay the compaction
                                replayWALCompactionMarker(compaction, false, true, Long.MAX_VALUE);
                    // Figure which store the edit is meant for.
                    if (store == null || !CellUtil.matchingFamily(cell, store.getFamily().getName())) {
                        store = getHStore(cell);
                    if (store == null) {
                        // This should never happen.  Perhaps schema was changed between
                        // crash and redeploy?
                        LOG.warn("No family for " + cell);
                    if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
                        LOG.warn("Row of " + cell + " is not within region boundary");
                    // Now, figure if we should skip this edit.
                    if (key.getLogSeqNum() <= maxSeqIdInStores.get(store.getFamily().getName())) {
                    CellUtil.setSequenceId(cell, currentReplaySeqId);
                    restoreEdit(store, cell, memstoreSize);
                if (this.rsAccounting != null) {
                    rsAccounting.addRegionReplayEditsSize(getRegionInfo().getRegionName(), memstoreSize);
                flush = isFlushSize(this.addAndGetMemstoreSize(memstoreSize));
                if (flush) {
                    internalFlushcache(null, currentEditSeqId, stores.values(), status, false);
                if (coprocessorHost != null) {
                    coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
            if (coprocessorHost != null) {
                coprocessorHost.postReplayWALs(this.getRegionInfo(), edits);
        } catch (EOFException eof) {
            Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
            msg = "EnLongAddered EOF. Most likely due to Master failure during " + "wal splitting, so we have this data in another edit.  " + "Continuing, but renaming " + edits + " as " + p;
            LOG.warn(msg, eof);
        } catch (IOException ioe) {
            // then this problem is idempotent and retrying won't help
            if (ioe.getCause() instanceof ParseException) {
                Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
                msg = "File corruption enLongAddered!  " + "Continuing, but renaming " + edits + " as " + p;
                LOG.warn(msg, ioe);
            } else {
                // checksum exception on one datanode, etc).  throw & retry
                throw ioe;
        if (reporter != null && !reported_once) {
        msg = "Applied " + editsCount + ", skipped " + skippedEdits + ", firstSequenceIdInLog=" + firstSeqIdInLog + ", maxSequenceIdInLog=" + currentEditSeqId + ", path=" + edits;
        return currentEditSeqId;
    } finally {
        if (reader != null) {
Also used : Path(org.apache.hadoop.fs.Path) WAL(org.apache.hadoop.hbase.wal.WAL) InterruptedIOException( IOException( MultipleIOException( DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) FileSystem(org.apache.hadoop.fs.FileSystem) EOFException( ParseException(java.text.ParseException) CompactionDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor) Cell(org.apache.hadoop.hbase.Cell) MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask)

Example 19 with WALKey

use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.

the class TestLogRollAbort method testLogRollAfterSplitStart.

   * Tests the case where a RegionServer enters a GC pause,
   * comes back online after the master declared it dead and started to split.
   * Want log rolling after a master split to fail. See HBASE-2312.
@Test(timeout = 300000)
public void testLogRollAfterSplitStart() throws IOException {"Verify wal roll after split starts will fail.");
    String logName = ServerName.valueOf("testLogRollAfterSplitStart", 16010, System.currentTimeMillis()).toString();
    Path thisTestsDir = new Path(HBASELOGDIR, AbstractFSWALProvider.getWALDirectoryName(logName));
    final WALFactory wals = new WALFactory(conf, null, logName);
    try {
        // put some entries in an WAL
        TableName tableName = TableName.valueOf(this.getClass().getName());
        HRegionInfo regioninfo = new HRegionInfo(tableName, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
        final WAL log = wals.getWAL(regioninfo.getEncodedNameAsBytes(), regioninfo.getTable().getNamespace());
        MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(1);
        final int total = 20;
        for (int i = 0; i < total; i++) {
            WALEdit kvs = new WALEdit();
            kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
            HTableDescriptor htd = new HTableDescriptor(tableName);
            htd.addFamily(new HColumnDescriptor("column"));
            NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
            for (byte[] fam : htd.getFamiliesKeys()) {
                scopes.put(fam, 0);
            log.append(regioninfo, new WALKey(regioninfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis(), mvcc, scopes), kvs, true);
        // Send the data to HDFS datanodes and close the HDFS writer
        ((AbstractFSWAL<?>) log).replaceWriter(((FSHLog) log).getOldPath(), null, null);
        /* code taken from MasterFileSystem.getLogDirs(), which is called from MasterFileSystem.splitLog()
       * handles RS shutdowns (as observed by the splitting process)
        // rename the directory so a rogue RS doesn't create more WALs
        Path rsSplitDir = thisTestsDir.suffix(AbstractFSWALProvider.SPLITTING_EXT);
        if (!fs.rename(thisTestsDir, rsSplitDir)) {
            throw new IOException("Failed fs.rename for log split: " + thisTestsDir);
        LOG.debug("Renamed region directory: " + rsSplitDir);
        LOG.debug("Processing the old log files.");
        WALSplitter.split(HBASELOGDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals);
        LOG.debug("Trying to roll the WAL.");
        try {
  "rollWriter() did not throw any exception.");
        } catch (IOException ioe) {
            if (ioe.getCause() instanceof FileNotFoundException) {
      "Got the expected exception: ", ioe.getCause());
            } else {
      "Unexpected exception: " + ioe);
    } finally {
        if (fs.exists(thisTestsDir)) {
            fs.delete(thisTestsDir, true);
Also used : Path(org.apache.hadoop.fs.Path) WAL(org.apache.hadoop.hbase.wal.WAL) KeyValue(org.apache.hadoop.hbase.KeyValue) MultiVersionConcurrencyControl(org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) FileNotFoundException( IOException( TreeMap(java.util.TreeMap) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) WALKey(org.apache.hadoop.hbase.wal.WALKey) TableName(org.apache.hadoop.hbase.TableName) WALFactory(org.apache.hadoop.hbase.wal.WALFactory) Test(org.junit.Test)

Example 20 with WALKey

use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.

the class AbstractTestWALReplay method testReplayEditsWrittenIntoWAL.

   * Create an HRegion with the result of a WAL split and test we only see the
   * good edits
   * @throws Exception
public void testReplayEditsWrittenIntoWAL() throws Exception {
    final TableName tableName = TableName.valueOf("testReplayEditsWrittenIntoWAL");
    final MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    final HRegionInfo hri = createBasic3FamilyHRegionInfo(tableName);
    final Path basedir = FSUtils.getTableDir(hbaseRootDir, tableName);
    final HTableDescriptor htd = createBasic3FamilyHTD(tableName);
    HRegion region2 = HBaseTestingUtility.createRegionAndWAL(hri, hbaseRootDir, this.conf, htd);
    final WAL wal = createWAL(this.conf, hbaseRootDir, logName);
    final byte[] rowName = tableName.getName();
    final byte[] regionName = hri.getEncodedNameAsBytes();
    // Add 1k to each family.
    final int countPerFamily = 1000;
    Set<byte[]> familyNames = new HashSet<>();
    NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    for (byte[] fam : htd.getFamiliesKeys()) {
        scopes.put(fam, 0);
    for (HColumnDescriptor hcd : htd.getFamilies()) {
        addWALEdits(tableName, hri, rowName, hcd.getName(), countPerFamily, ee, wal, htd, mvcc, scopes);
    // Add a cache flush, shouldn't have any effect
    wal.startCacheFlush(regionName, familyNames);
    // Add an edit to another family, should be skipped.
    WALEdit edit = new WALEdit();
    long now = ee.currentTime();
    edit.add(new KeyValue(rowName, Bytes.toBytes("another family"), rowName, now, rowName));
    wal.append(hri, new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes), edit, true);
    // Delete the c family to verify deletes make it over.
    edit = new WALEdit();
    now = ee.currentTime();
    edit.add(new KeyValue(rowName, Bytes.toBytes("c"), null, now, KeyValue.Type.DeleteFamily));
    wal.append(hri, new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes), edit, true);
    // Sync.
    // Make a new conf and a new fs for the splitter to run on so we can take
    // over old wal.
    final Configuration newConf = HBaseConfiguration.create(this.conf);
    User user = HBaseTestingUtility.getDifferentUser(newConf, ".replay.wal.secondtime");
    user.runAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            FileSystem newFS = FileSystem.get(newConf);
            // 100k seems to make for about 4 flushes during HRegion#initialize.
            newConf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 100);
            // Make a new wal for new region.
            WAL newWal = createWAL(newConf, hbaseRootDir, logName);
            final AtomicInteger flushcount = new AtomicInteger(0);
            try {
                final HRegion region = new HRegion(basedir, newWal, newFS, newConf, hri, htd, null) {

                    protected FlushResult internalFlushcache(final WAL wal, final long myseqid, final Collection<Store> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker) throws IOException {
              "InternalFlushCache Invoked");
                        FlushResult fs = super.internalFlushcache(wal, myseqid, storesToFlush, Mockito.mock(MonitoredTask.class), writeFlushWalMarker);
                        return fs;
                // The seq id this region has opened up with
                long seqid = region.initialize();
                // The mvcc readpoint of from inserting data.
                long writePoint = mvcc.getWritePoint();
                // We flushed during init.
                assertTrue("Flushcount=" + flushcount.get(), flushcount.get() > 0);
                assertTrue((seqid - 1) == writePoint);
                Get get = new Get(rowName);
                Result result = region.get(get);
                // Make sure we only see the good edits
                assertEquals(countPerFamily * (htd.getFamilies().size() - 1), result.size());
            } finally {
            return null;
Also used : WAL(org.apache.hadoop.hbase.wal.WAL) User( Configuration(org.apache.hadoop.conf.Configuration) MultiVersionConcurrencyControl(org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl) Store(org.apache.hadoop.hbase.regionserver.Store) CompactingMemStore(org.apache.hadoop.hbase.regionserver.CompactingMemStore) HStore(org.apache.hadoop.hbase.regionserver.HStore) Result(org.apache.hadoop.hbase.client.Result) WALKey(org.apache.hadoop.hbase.wal.WALKey) FileSystem(org.apache.hadoop.fs.FileSystem) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) IOException( TreeMap(java.util.TreeMap) IOException( AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Get(org.apache.hadoop.hbase.client.Get) MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask) Test(org.junit.Test)


WALKey (org.apache.hadoop.hbase.wal.WALKey)51 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)29 Test (org.junit.Test)26 WAL (org.apache.hadoop.hbase.wal.WAL)22 TreeMap (java.util.TreeMap)17 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)17 KeyValue (org.apache.hadoop.hbase.KeyValue)16 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)15 IOException ( Path (org.apache.hadoop.fs.Path)14 TableName (org.apache.hadoop.hbase.TableName)12 ArrayList (java.util.ArrayList)10 Cell (org.apache.hadoop.hbase.Cell)10 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 Get (org.apache.hadoop.hbase.client.Get)9 Result (org.apache.hadoop.hbase.client.Result)9 MultiVersionConcurrencyControl (org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl)8 WALFactory (org.apache.hadoop.hbase.wal.WALFactory)8 Put (org.apache.hadoop.hbase.client.Put)7