Example 11 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project hbase by apache.

the class HRegion method doPreBatchMutateHook.

private void doPreBatchMutateHook(BatchOperation<?> batchOp) throws IOException {
    /* Run coprocessor pre hook outside of locks to avoid deadlock */
    WALEdit walEdit = new WALEdit();
    if (coprocessorHost != null) {
        for (int i = 0; i < batchOp.operations.length; i++) {
            Mutation m = batchOp.getMutation(i);
            if (m instanceof Put) {
                if (coprocessorHost.prePut((Put) m, walEdit, m.getDurability())) {
                    // pre hook says skip this Put
                    // mark as success and skip in doMiniBatchMutation
                    batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
            } else if (m instanceof Delete) {
                Delete curDel = (Delete) m;
                if (curDel.getFamilyCellMap().isEmpty()) {
                    // handle deleting a row case
                if (coprocessorHost.preDelete(curDel, walEdit, m.getDurability())) {
                    // pre hook says skip this Delete
                    // mark as success and skip in doMiniBatchMutation
                    batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
            } else {
                // In case of passing Append mutations along with the Puts and Deletes in batchMutate
                // mark the operation return code as failure so that it will not be considered in
                // the doMiniBatchMutation
                batchOp.retCodeDetails[i] = new OperationStatus(OperationStatusCode.FAILURE, "Put/Delete mutations only supported in batchMutate() now");
            if (!walEdit.isEmpty()) {
                batchOp.walEditsFromCoprocessors[i] = walEdit;
                walEdit = new WALEdit();
Also used : Delete(org.apache.hadoop.hbase.client.Delete) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Mutation(org.apache.hadoop.hbase.client.Mutation) Put(org.apache.hadoop.hbase.client.Put)

Example 12 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project hbase by apache.

the class HRegion method replayRecoveredEdits.

   * @param edits File of recovered edits.
   * @param maxSeqIdInStores Maximum sequenceid found in each store.  Edits in wal
   * must be larger than this to be replayed for each store.
   * @param reporter
   * @return the sequence id of the last edit added to this region out of the
   * recovered edits log or <code>minSeqId</code> if nothing added from editlogs.
   * @throws IOException
private long replayRecoveredEdits(final Path edits, Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter) throws IOException {
    String msg = "Replaying edits from " + edits;;
    MonitoredTask status = TaskMonitor.get().createStatus(msg);
    FileSystem fs = this.fs.getFileSystem();
    status.setStatus("Opening recovered edits");
    WAL.Reader reader = null;
    try {
        reader = WALFactory.createReader(fs, edits, conf);
        long currentEditSeqId = -1;
        long currentReplaySeqId = -1;
        long firstSeqIdInLog = -1;
        long skippedEdits = 0;
        long editsCount = 0;
        long intervalEdits = 0;
        WAL.Entry entry;
        HStore store = null;
        boolean reported_once = false;
        ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();
        try {
            // How many edits seen before we check elapsed time
            int interval = this.conf.getInt("", 2000);
            // How often to send a progress report (default 1/2 master timeout)
            int period = this.conf.getInt("", 300000);
            long lastReport = EnvironmentEdgeManager.currentTime();
            if (coprocessorHost != null) {
                coprocessorHost.preReplayWALs(this.getRegionInfo(), edits);
            while ((entry = != null) {
                WALKey key = entry.getKey();
                WALEdit val = entry.getEdit();
                if (ng != null) {
                    // some test, or nonces disabled
                    ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
                if (reporter != null) {
                    intervalEdits += val.size();
                    if (intervalEdits >= interval) {
                        // Number of edits interval reached
                        intervalEdits = 0;
                        long cur = EnvironmentEdgeManager.currentTime();
                        if (lastReport + period <= cur) {
                            status.setStatus("Replaying edits..." + " skipped=" + skippedEdits + " edits=" + editsCount);
                            // Timeout reached
                            if (!reporter.progress()) {
                                msg = "Progressable reporter failed, stopping replay";
                                throw new IOException(msg);
                            reported_once = true;
                            lastReport = cur;
                if (firstSeqIdInLog == -1) {
                    firstSeqIdInLog = key.getLogSeqNum();
                if (currentEditSeqId > key.getLogSeqNum()) {
                    // when this condition is true, it means we have a serious defect because we need to
                    // maintain increasing SeqId for WAL edits per region
                    LOG.error(getRegionInfo().getEncodedName() + " : " + "Found decreasing SeqId. PreId=" + currentEditSeqId + " key=" + key + "; edit=" + val);
                } else {
                    currentEditSeqId = key.getLogSeqNum();
                currentReplaySeqId = (key.getOrigLogSeqNum() > 0) ? key.getOrigLogSeqNum() : currentEditSeqId;
                // instead of a KeyValue.
                if (coprocessorHost != null) {
                    status.setStatus("Running pre-WAL-restore hook in coprocessors");
                    if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
                        // if bypass this wal entry, ignore it ...
                boolean checkRowWithinBoundary = false;
                // Check this edit is for this region.
                if (!Bytes.equals(key.getEncodedRegionName(), this.getRegionInfo().getEncodedNameAsBytes())) {
                    checkRowWithinBoundary = true;
                boolean flush = false;
                MemstoreSize memstoreSize = new MemstoreSize();
                for (Cell cell : val.getCells()) {
                    // METACOLUMN info such as HBASE::CACHEFLUSH entries
                    if (CellUtil.matchingFamily(cell, WALEdit.METAFAMILY)) {
                        // if region names don't match, skipp replaying compaction marker
                        if (!checkRowWithinBoundary) {
                            //this is a special edit, we should handle it
                            CompactionDescriptor compaction = WALEdit.getCompaction(cell);
                            if (compaction != null) {
                                //replay the compaction
                                replayWALCompactionMarker(compaction, false, true, Long.MAX_VALUE);
                    // Figure which store the edit is meant for.
                    if (store == null || !CellUtil.matchingFamily(cell, store.getFamily().getName())) {
                        store = getHStore(cell);
                    if (store == null) {
                        // This should never happen.  Perhaps schema was changed between
                        // crash and redeploy?
                        LOG.warn("No family for " + cell);
                    if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
                        LOG.warn("Row of " + cell + " is not within region boundary");
                    // Now, figure if we should skip this edit.
                    if (key.getLogSeqNum() <= maxSeqIdInStores.get(store.getFamily().getName())) {
                    CellUtil.setSequenceId(cell, currentReplaySeqId);
                    restoreEdit(store, cell, memstoreSize);
                if (this.rsAccounting != null) {
                    rsAccounting.addRegionReplayEditsSize(getRegionInfo().getRegionName(), memstoreSize);
                flush = isFlushSize(this.addAndGetMemstoreSize(memstoreSize));
                if (flush) {
                    internalFlushcache(null, currentEditSeqId, stores.values(), status, false);
                if (coprocessorHost != null) {
                    coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
            if (coprocessorHost != null) {
                coprocessorHost.postReplayWALs(this.getRegionInfo(), edits);
        } catch (EOFException eof) {
            Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
            msg = "EnLongAddered EOF. Most likely due to Master failure during " + "wal splitting, so we have this data in another edit.  " + "Continuing, but renaming " + edits + " as " + p;
            LOG.warn(msg, eof);
        } catch (IOException ioe) {
            // then this problem is idempotent and retrying won't help
            if (ioe.getCause() instanceof ParseException) {
                Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
                msg = "File corruption enLongAddered!  " + "Continuing, but renaming " + edits + " as " + p;
                LOG.warn(msg, ioe);
            } else {
                // checksum exception on one datanode, etc).  throw & retry
                throw ioe;
        if (reporter != null && !reported_once) {
        msg = "Applied " + editsCount + ", skipped " + skippedEdits + ", firstSequenceIdInLog=" + firstSeqIdInLog + ", maxSequenceIdInLog=" + currentEditSeqId + ", path=" + edits;
        return currentEditSeqId;
    } finally {
        if (reader != null) {
Also used : Path(org.apache.hadoop.fs.Path) WAL(org.apache.hadoop.hbase.wal.WAL) InterruptedIOException( IOException( MultipleIOException( DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) FileSystem(org.apache.hadoop.fs.FileSystem) EOFException( ParseException(java.text.ParseException) CompactionDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor) Cell(org.apache.hadoop.hbase.Cell) MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask)

Example 13 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.

the class SystemCatalogWALEntryFilterIT method testOtherTablesAutoPass.

public void testOtherTablesAutoPass() throws Exception {
    // Cell is nonsense but we should auto pass because the table name's not System.Catalog
    WAL.Entry entry = new WAL.Entry(new WALKey(REGION, TableName.valueOf(TestUtil.ENTITY_HISTORY_TABLE_NAME)), new WALEdit());
    SystemCatalogWALEntryFilter filter = new SystemCatalogWALEntryFilter();
    Assert.assertEquals(1, filter.filter(entry).getEdit().size());
Also used : WALKey(org.apache.hadoop.hbase.wal.WALKey) WAL(org.apache.hadoop.hbase.wal.WAL) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Test(org.junit.Test)

Example 14 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.

the class SystemCatalogWALEntryFilterIT method testSystemCatalogWALEntryFilter.

public void testSystemCatalogWALEntryFilter() throws Exception {
    // now create WAL.Entry objects that refer to cells in those view rows in System.Catalog
    Get tenantViewGet = getTenantViewGet(catalogTable, TENANT_BYTES, TENANT_VIEW_NAME);
    Get nonTenantViewGet = getTenantViewGet(catalogTable, DEFAULT_TENANT_BYTES, NONTENANT_VIEW_NAME);
    Get tenantLinkGet = getParentChildLinkGet(catalogTable, TENANT_BYTES, TENANT_VIEW_NAME);
    Get nonTenantLinkGet = getParentChildLinkGet(catalogTable, DEFAULT_TENANT_BYTES, NONTENANT_VIEW_NAME);
    WAL.Entry nonTenantViewEntry = getEntry(systemCatalogTableName, nonTenantViewGet);
    WAL.Entry tenantViewEntry = getEntry(systemCatalogTableName, tenantViewGet);
    WAL.Entry nonTenantLinkEntry = getEntry(systemCatalogTableName, nonTenantLinkGet);
    WAL.Entry tenantLinkEntry = getEntry(systemCatalogTableName, tenantLinkGet);
    // verify that the tenant view WAL.Entry passes the filter and the non-tenant view does not
    SystemCatalogWALEntryFilter filter = new SystemCatalogWALEntryFilter();
    WAL.Entry filteredTenantEntry = filter.filter(tenantViewEntry);
    Assert.assertNotNull("Tenant view was filtered when it shouldn't be!", filteredTenantEntry);
    Assert.assertEquals(tenantViewEntry.getEdit().size(), filter.filter(tenantViewEntry).getEdit().size());
    // now check that a WAL.Entry with cells from both a tenant and a non-tenant
    // catalog row only allow the tenant cells through
    WALEdit comboEdit = new WALEdit();
    WAL.Entry comboEntry = new WAL.Entry(walKey, comboEdit);
    Assert.assertEquals(tenantViewEntry.getEdit().size() + nonTenantViewEntry.getEdit().size(), comboEntry.getEdit().size());
    Assert.assertEquals(tenantViewEntry.getEdit().size(), filter.filter(comboEntry).getEdit().size());
    // now check that the parent-child links (which have the tenant_id of the view's parent,
    // but are a part of the view's metadata) are migrated in the tenant case
    // but not the non-tenant. The view's tenant_id is in th System.Catalog.COLUMN_NAME field
    Assert.assertNull("Non-tenant parent-child link was not filtered " + "when it should be!", filter.filter(nonTenantLinkEntry));
    Assert.assertNotNull("Tenant parent-child link was filtered when it should not be!", filter.filter(tenantLinkEntry));
    Assert.assertEquals(tenantLinkEntry.getEdit().size(), filter.filter(tenantLinkEntry).getEdit().size());
    // add the parent-child link to the tenant view WAL entry,
    // since they'll usually be together and they both need to
    // be replicated
    Assert.assertEquals(tenantViewEntry.getEdit().size(), tenantViewEntry.getEdit().size());
Also used : WAL(org.apache.hadoop.hbase.wal.WAL) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Test(org.junit.Test)

Example 15 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.

the class Indexer method preBatchMutateWithExceptions.

public void preBatchMutateWithExceptions(ObserverContext<RegionCoprocessorEnvironment> c, MiniBatchOperationInProgress<Mutation> miniBatchOp) throws Throwable {
    // first group all the updates for a single row into a single update to be processed
    Map<ImmutableBytesPtr, MultiMutation> mutationsMap = new HashMap<ImmutableBytesPtr, MultiMutation>();
    Durability defaultDurability = Durability.SYNC_WAL;
    if (c.getEnvironment().getRegion() != null) {
        defaultDurability = c.getEnvironment().getRegion().getTableDesc().getDurability();
        defaultDurability = (defaultDurability == Durability.USE_DEFAULT) ? Durability.SYNC_WAL : defaultDurability;
       * Exclusively lock all rows so we get a consistent read
       * while determining the index updates
    BatchMutateContext context = new BatchMutateContext();
    setBatchMutateContext(c, context);
    Durability durability = Durability.SKIP_WAL;
    boolean copyMutations = false;
    for (int i = 0; i < miniBatchOp.size(); i++) {
        Mutation m = miniBatchOp.getOperation(i);
        if (this.builder.isAtomicOp(m)) {
            miniBatchOp.setOperationStatus(i, IGNORE);
        if (this.builder.isEnabled(m)) {
            context.rowLocks.add(lockManager.lockRow(m.getRow(), rowLockWaitDuration));
            Durability effectiveDurablity = (m.getDurability() == Durability.USE_DEFAULT) ? defaultDurability : m.getDurability();
            if (effectiveDurablity.ordinal() > durability.ordinal()) {
                durability = effectiveDurablity;
            // Track whether or not we need to
            ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
            if (mutationsMap.containsKey(row)) {
                copyMutations = true;
            } else {
                mutationsMap.put(row, null);
    // early exit if it turns out we don't have any edits
    if (mutationsMap.isEmpty()) {
    // If we're copying the mutations
    Collection<Mutation> originalMutations;
    Collection<? extends Mutation> mutations;
    if (copyMutations) {
        originalMutations = null;
        mutations = mutationsMap.values();
    } else {
        originalMutations = Lists.newArrayListWithExpectedSize(mutationsMap.size());
        mutations = originalMutations;
    Mutation firstMutation = miniBatchOp.getOperation(0);
    ReplayWrite replayWrite = this.builder.getReplayWrite(firstMutation);
    boolean resetTimeStamp = replayWrite == null;
    long now = EnvironmentEdgeManager.currentTimeMillis();
    byte[] byteNow = Bytes.toBytes(now);
    for (int i = 0; i < miniBatchOp.size(); i++) {
        Mutation m = miniBatchOp.getOperation(i);
        // way optimization go though.
        if (miniBatchOp.getOperationStatus(i) != IGNORE && this.builder.isEnabled(m)) {
            if (resetTimeStamp) {
                // inconsistencies as this case isn't handled correctly currently).
                for (List<Cell> family : m.getFamilyCellMap().values()) {
                    List<KeyValue> familyKVs = KeyValueUtil.ensureKeyValues(family);
                    for (KeyValue kv : familyKVs) {
                        setTimeStamp(kv, byteNow);
            // the index as they're already written and just being replayed.
            if (replayWrite == ReplayWrite.INDEX_ONLY) {
                miniBatchOp.setOperationStatus(i, NOWRITE);
            // Put and a Delete mutation for the same row).
            if (copyMutations) {
                // Add the mutation to the batch set
                ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
                MultiMutation stored = mutationsMap.get(row);
                // we haven't seen this row before, so add it
                if (stored == null) {
                    stored = new MultiMutation(row);
                    mutationsMap.put(row, stored);
            } else {
    // dump all the index updates into a single WAL. They will get combined in the end anyways, so
    // don't worry which one we get
    WALEdit edit = miniBatchOp.getWalEdit(0);
    if (edit == null) {
        edit = new WALEdit();
        miniBatchOp.setWalEdit(0, edit);
    if (copyMutations || replayWrite != null) {
        mutations = IndexManagementUtil.flattenMutationsByTimestamp(mutations);
    // get the current span, or just use a null-span to avoid a bunch of if statements
    try (TraceScope scope = Trace.startSpan("Starting to build index updates")) {
        Span current = scope.getSpan();
        if (current == null) {
            current = NullSpan.INSTANCE;
        long start = EnvironmentEdgeManager.currentTimeMillis();
        // get the index updates for all elements in this batch
        Collection<Pair<Mutation, byte[]>> indexUpdates = this.builder.getIndexUpdate(miniBatchOp, mutations);
        long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
        if (duration >= slowIndexPrepareThreshold) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(getCallTooSlowMessage("indexPrepare", duration, slowIndexPrepareThreshold));
        current.addTimelineAnnotation("Built index updates, doing preStep");
        TracingUtils.addAnnotation(current, "index update count", indexUpdates.size());
        byte[] tableName = c.getEnvironment().getRegion().getTableDesc().getTableName().getName();
        Iterator<Pair<Mutation, byte[]>> indexUpdatesItr = indexUpdates.iterator();
        List<Mutation> localUpdates = new ArrayList<Mutation>(indexUpdates.size());
        while (indexUpdatesItr.hasNext()) {
            Pair<Mutation, byte[]> next =;
            if (Bytes.compareTo(next.getSecond(), tableName) == 0) {
        if (!localUpdates.isEmpty()) {
            miniBatchOp.addOperationsFromCP(0, localUpdates.toArray(new Mutation[localUpdates.size()]));
        if (!indexUpdates.isEmpty()) {
            context.indexUpdates = indexUpdates;
            // write index updates to WAL
            if (durability != Durability.SKIP_WAL) {
                // we have all the WAL durability, so we just update the WAL entry and move on
                for (Pair<Mutation, byte[]> entry : indexUpdates) {
                    edit.add(new IndexedKeyValue(entry.getSecond(), entry.getFirst()));
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) IndexedKeyValue(org.apache.phoenix.hbase.index.wal.IndexedKeyValue) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Span(org.apache.htrace.Span) NullSpan(org.apache.phoenix.trace.util.NullSpan) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Cell(org.apache.hadoop.hbase.Cell) Pair(org.apache.hadoop.hbase.util.Pair) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) TraceScope(org.apache.htrace.TraceScope) Durability(org.apache.hadoop.hbase.client.Durability) ReplayWrite(org.apache.phoenix.coprocessor.BaseScannerRegionObserver.ReplayWrite) Mutation(org.apache.hadoop.hbase.client.Mutation) IndexedKeyValue(org.apache.phoenix.hbase.index.wal.IndexedKeyValue)


