Search in sources :

Example 11 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project hbase by apache.

the class HRegion method doPreBatchMutateHook.

private void doPreBatchMutateHook(BatchOperation<?> batchOp) throws IOException {
    /* Run coprocessor pre hook outside of locks to avoid deadlock */
    WALEdit walEdit = new WALEdit();
    if (coprocessorHost != null) {
        for (int i = 0; i < batchOp.operations.length; i++) {
            Mutation m = batchOp.getMutation(i);
            if (m instanceof Put) {
                if (coprocessorHost.prePut((Put) m, walEdit, m.getDurability())) {
                    // pre hook says skip this Put
                    // mark as success and skip in doMiniBatchMutation
                    batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
                }
            } else if (m instanceof Delete) {
                Delete curDel = (Delete) m;
                if (curDel.getFamilyCellMap().isEmpty()) {
                    // handle deleting a row case
                    prepareDelete(curDel);
                }
                if (coprocessorHost.preDelete(curDel, walEdit, m.getDurability())) {
                    // pre hook says skip this Delete
                    // mark as success and skip in doMiniBatchMutation
                    batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
                }
            } else {
                // In case of passing Append mutations along with the Puts and Deletes in batchMutate
                // mark the operation return code as failure so that it will not be considered in
                // the doMiniBatchMutation
                batchOp.retCodeDetails[i] = new OperationStatus(OperationStatusCode.FAILURE, "Put/Delete mutations only supported in batchMutate() now");
            }
            if (!walEdit.isEmpty()) {
                batchOp.walEditsFromCoprocessors[i] = walEdit;
                walEdit = new WALEdit();
            }
        }
    }
}
Also used : Delete(org.apache.hadoop.hbase.client.Delete) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Mutation(org.apache.hadoop.hbase.client.Mutation) Put(org.apache.hadoop.hbase.client.Put)

Example 12 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project hbase by apache.

the class HRegion method replayRecoveredEdits.

/*
   * @param edits File of recovered edits.
   * @param maxSeqIdInStores Maximum sequenceid found in each store.  Edits in wal
   * must be larger than this to be replayed for each store.
   * @param reporter
   * @return the sequence id of the last edit added to this region out of the
   * recovered edits log or <code>minSeqId</code> if nothing added from editlogs.
   * @throws IOException
   */
private long replayRecoveredEdits(final Path edits, Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter) throws IOException {
    String msg = "Replaying edits from " + edits;
    LOG.info(msg);
    MonitoredTask status = TaskMonitor.get().createStatus(msg);
    FileSystem fs = this.fs.getFileSystem();
    status.setStatus("Opening recovered edits");
    WAL.Reader reader = null;
    try {
        reader = WALFactory.createReader(fs, edits, conf);
        long currentEditSeqId = -1;
        long currentReplaySeqId = -1;
        long firstSeqIdInLog = -1;
        long skippedEdits = 0;
        long editsCount = 0;
        long intervalEdits = 0;
        WAL.Entry entry;
        HStore store = null;
        boolean reported_once = false;
        ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();
        try {
            // How many edits seen before we check elapsed time
            int interval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
            // How often to send a progress report (default 1/2 master timeout)
            int period = this.conf.getInt("hbase.hstore.report.period", 300000);
            long lastReport = EnvironmentEdgeManager.currentTime();
            if (coprocessorHost != null) {
                coprocessorHost.preReplayWALs(this.getRegionInfo(), edits);
            }
            while ((entry = reader.next()) != null) {
                WALKey key = entry.getKey();
                WALEdit val = entry.getEdit();
                if (ng != null) {
                    // some test, or nonces disabled
                    ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
                }
                if (reporter != null) {
                    intervalEdits += val.size();
                    if (intervalEdits >= interval) {
                        // Number of edits interval reached
                        intervalEdits = 0;
                        long cur = EnvironmentEdgeManager.currentTime();
                        if (lastReport + period <= cur) {
                            status.setStatus("Replaying edits..." + " skipped=" + skippedEdits + " edits=" + editsCount);
                            // Timeout reached
                            if (!reporter.progress()) {
                                msg = "Progressable reporter failed, stopping replay";
                                LOG.warn(msg);
                                status.abort(msg);
                                throw new IOException(msg);
                            }
                            reported_once = true;
                            lastReport = cur;
                        }
                    }
                }
                if (firstSeqIdInLog == -1) {
                    firstSeqIdInLog = key.getLogSeqNum();
                }
                if (currentEditSeqId > key.getLogSeqNum()) {
                    // when this condition is true, it means we have a serious defect because we need to
                    // maintain increasing SeqId for WAL edits per region
                    LOG.error(getRegionInfo().getEncodedName() + " : " + "Found decreasing SeqId. PreId=" + currentEditSeqId + " key=" + key + "; edit=" + val);
                } else {
                    currentEditSeqId = key.getLogSeqNum();
                }
                currentReplaySeqId = (key.getOrigLogSeqNum() > 0) ? key.getOrigLogSeqNum() : currentEditSeqId;
                // instead of a KeyValue.
                if (coprocessorHost != null) {
                    status.setStatus("Running pre-WAL-restore hook in coprocessors");
                    if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
                        // if bypass this wal entry, ignore it ...
                        continue;
                    }
                }
                boolean checkRowWithinBoundary = false;
                // Check this edit is for this region.
                if (!Bytes.equals(key.getEncodedRegionName(), this.getRegionInfo().getEncodedNameAsBytes())) {
                    checkRowWithinBoundary = true;
                }
                boolean flush = false;
                MemstoreSize memstoreSize = new MemstoreSize();
                for (Cell cell : val.getCells()) {
                    // METACOLUMN info such as HBASE::CACHEFLUSH entries
                    if (CellUtil.matchingFamily(cell, WALEdit.METAFAMILY)) {
                        // if region names don't match, skipp replaying compaction marker
                        if (!checkRowWithinBoundary) {
                            //this is a special edit, we should handle it
                            CompactionDescriptor compaction = WALEdit.getCompaction(cell);
                            if (compaction != null) {
                                //replay the compaction
                                replayWALCompactionMarker(compaction, false, true, Long.MAX_VALUE);
                            }
                        }
                        skippedEdits++;
                        continue;
                    }
                    // Figure which store the edit is meant for.
                    if (store == null || !CellUtil.matchingFamily(cell, store.getFamily().getName())) {
                        store = getHStore(cell);
                    }
                    if (store == null) {
                        // This should never happen.  Perhaps schema was changed between
                        // crash and redeploy?
                        LOG.warn("No family for " + cell);
                        skippedEdits++;
                        continue;
                    }
                    if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
                        LOG.warn("Row of " + cell + " is not within region boundary");
                        skippedEdits++;
                        continue;
                    }
                    // Now, figure if we should skip this edit.
                    if (key.getLogSeqNum() <= maxSeqIdInStores.get(store.getFamily().getName())) {
                        skippedEdits++;
                        continue;
                    }
                    CellUtil.setSequenceId(cell, currentReplaySeqId);
                    restoreEdit(store, cell, memstoreSize);
                    editsCount++;
                }
                if (this.rsAccounting != null) {
                    rsAccounting.addRegionReplayEditsSize(getRegionInfo().getRegionName(), memstoreSize);
                }
                flush = isFlushSize(this.addAndGetMemstoreSize(memstoreSize));
                if (flush) {
                    internalFlushcache(null, currentEditSeqId, stores.values(), status, false);
                }
                if (coprocessorHost != null) {
                    coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
                }
            }
            if (coprocessorHost != null) {
                coprocessorHost.postReplayWALs(this.getRegionInfo(), edits);
            }
        } catch (EOFException eof) {
            Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
            msg = "EnLongAddered EOF. Most likely due to Master failure during " + "wal splitting, so we have this data in another edit.  " + "Continuing, but renaming " + edits + " as " + p;
            LOG.warn(msg, eof);
            status.abort(msg);
        } catch (IOException ioe) {
            // then this problem is idempotent and retrying won't help
            if (ioe.getCause() instanceof ParseException) {
                Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
                msg = "File corruption enLongAddered!  " + "Continuing, but renaming " + edits + " as " + p;
                LOG.warn(msg, ioe);
                status.setStatus(msg);
            } else {
                status.abort(StringUtils.stringifyException(ioe));
                // checksum exception on one datanode, etc).  throw & retry
                throw ioe;
            }
        }
        if (reporter != null && !reported_once) {
            reporter.progress();
        }
        msg = "Applied " + editsCount + ", skipped " + skippedEdits + ", firstSequenceIdInLog=" + firstSeqIdInLog + ", maxSequenceIdInLog=" + currentEditSeqId + ", path=" + edits;
        status.markComplete(msg);
        LOG.debug(msg);
        return currentEditSeqId;
    } finally {
        status.cleanup();
        if (reader != null) {
            reader.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) WAL(org.apache.hadoop.hbase.wal.WAL) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) FileSystem(org.apache.hadoop.fs.FileSystem) EOFException(java.io.EOFException) ParseException(java.text.ParseException) CompactionDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor) Cell(org.apache.hadoop.hbase.Cell) MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask)

Example 13 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.

the class SystemCatalogWALEntryFilterIT method testOtherTablesAutoPass.

@Test
public void testOtherTablesAutoPass() throws Exception {
    // Cell is nonsense but we should auto pass because the table name's not System.Catalog
    WAL.Entry entry = new WAL.Entry(new WALKey(REGION, TableName.valueOf(TestUtil.ENTITY_HISTORY_TABLE_NAME)), new WALEdit());
    entry.getEdit().add(CellUtil.createCell(Bytes.toBytes("foo")));
    SystemCatalogWALEntryFilter filter = new SystemCatalogWALEntryFilter();
    Assert.assertEquals(1, filter.filter(entry).getEdit().size());
}
Also used : WALKey(org.apache.hadoop.hbase.wal.WALKey) WAL(org.apache.hadoop.hbase.wal.WAL) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Test(org.junit.Test)

Example 14 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.

the class SystemCatalogWALEntryFilterIT method testSystemCatalogWALEntryFilter.

@Test
public void testSystemCatalogWALEntryFilter() throws Exception {
    // now create WAL.Entry objects that refer to cells in those view rows in System.Catalog
    Get tenantViewGet = getTenantViewGet(catalogTable, TENANT_BYTES, TENANT_VIEW_NAME);
    Get nonTenantViewGet = getTenantViewGet(catalogTable, DEFAULT_TENANT_BYTES, NONTENANT_VIEW_NAME);
    Get tenantLinkGet = getParentChildLinkGet(catalogTable, TENANT_BYTES, TENANT_VIEW_NAME);
    Get nonTenantLinkGet = getParentChildLinkGet(catalogTable, DEFAULT_TENANT_BYTES, NONTENANT_VIEW_NAME);
    WAL.Entry nonTenantViewEntry = getEntry(systemCatalogTableName, nonTenantViewGet);
    WAL.Entry tenantViewEntry = getEntry(systemCatalogTableName, tenantViewGet);
    WAL.Entry nonTenantLinkEntry = getEntry(systemCatalogTableName, nonTenantLinkGet);
    WAL.Entry tenantLinkEntry = getEntry(systemCatalogTableName, tenantLinkGet);
    // verify that the tenant view WAL.Entry passes the filter and the non-tenant view does not
    SystemCatalogWALEntryFilter filter = new SystemCatalogWALEntryFilter();
    Assert.assertNull(filter.filter(nonTenantViewEntry));
    WAL.Entry filteredTenantEntry = filter.filter(tenantViewEntry);
    Assert.assertNotNull("Tenant view was filtered when it shouldn't be!", filteredTenantEntry);
    Assert.assertEquals(tenantViewEntry.getEdit().size(), filter.filter(tenantViewEntry).getEdit().size());
    // now check that a WAL.Entry with cells from both a tenant and a non-tenant
    // catalog row only allow the tenant cells through
    WALEdit comboEdit = new WALEdit();
    comboEdit.getCells().addAll(nonTenantViewEntry.getEdit().getCells());
    comboEdit.getCells().addAll(tenantViewEntry.getEdit().getCells());
    WAL.Entry comboEntry = new WAL.Entry(walKey, comboEdit);
    Assert.assertEquals(tenantViewEntry.getEdit().size() + nonTenantViewEntry.getEdit().size(), comboEntry.getEdit().size());
    Assert.assertEquals(tenantViewEntry.getEdit().size(), filter.filter(comboEntry).getEdit().size());
    // now check that the parent-child links (which have the tenant_id of the view's parent,
    // but are a part of the view's metadata) are migrated in the tenant case
    // but not the non-tenant. The view's tenant_id is in th System.Catalog.COLUMN_NAME field
    Assert.assertNull("Non-tenant parent-child link was not filtered " + "when it should be!", filter.filter(nonTenantLinkEntry));
    Assert.assertNotNull("Tenant parent-child link was filtered when it should not be!", filter.filter(tenantLinkEntry));
    Assert.assertEquals(tenantLinkEntry.getEdit().size(), filter.filter(tenantLinkEntry).getEdit().size());
    // add the parent-child link to the tenant view WAL entry,
    // since they'll usually be together and they both need to
    // be replicated
    tenantViewEntry.getEdit().getCells().addAll(tenantLinkEntry.getEdit().getCells());
    Assert.assertEquals(tenantViewEntry.getEdit().size(), tenantViewEntry.getEdit().size());
}
Also used : WAL(org.apache.hadoop.hbase.wal.WAL) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Test(org.junit.Test)

Example 15 with WALEdit

use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.

the class Indexer method preBatchMutateWithExceptions.

public void preBatchMutateWithExceptions(ObserverContext<RegionCoprocessorEnvironment> c, MiniBatchOperationInProgress<Mutation> miniBatchOp) throws Throwable {
    // first group all the updates for a single row into a single update to be processed
    Map<ImmutableBytesPtr, MultiMutation> mutationsMap = new HashMap<ImmutableBytesPtr, MultiMutation>();
    Durability defaultDurability = Durability.SYNC_WAL;
    if (c.getEnvironment().getRegion() != null) {
        defaultDurability = c.getEnvironment().getRegion().getTableDesc().getDurability();
        defaultDurability = (defaultDurability == Durability.USE_DEFAULT) ? Durability.SYNC_WAL : defaultDurability;
    }
    /*
       * Exclusively lock all rows so we get a consistent read
       * while determining the index updates
       */
    BatchMutateContext context = new BatchMutateContext();
    setBatchMutateContext(c, context);
    Durability durability = Durability.SKIP_WAL;
    boolean copyMutations = false;
    for (int i = 0; i < miniBatchOp.size(); i++) {
        Mutation m = miniBatchOp.getOperation(i);
        if (this.builder.isAtomicOp(m)) {
            miniBatchOp.setOperationStatus(i, IGNORE);
            continue;
        }
        if (this.builder.isEnabled(m)) {
            context.rowLocks.add(lockManager.lockRow(m.getRow(), rowLockWaitDuration));
            Durability effectiveDurablity = (m.getDurability() == Durability.USE_DEFAULT) ? defaultDurability : m.getDurability();
            if (effectiveDurablity.ordinal() > durability.ordinal()) {
                durability = effectiveDurablity;
            }
            // Track whether or not we need to
            ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
            if (mutationsMap.containsKey(row)) {
                copyMutations = true;
            } else {
                mutationsMap.put(row, null);
            }
        }
    }
    // early exit if it turns out we don't have any edits
    if (mutationsMap.isEmpty()) {
        return;
    }
    // If we're copying the mutations
    Collection<Mutation> originalMutations;
    Collection<? extends Mutation> mutations;
    if (copyMutations) {
        originalMutations = null;
        mutations = mutationsMap.values();
    } else {
        originalMutations = Lists.newArrayListWithExpectedSize(mutationsMap.size());
        mutations = originalMutations;
    }
    Mutation firstMutation = miniBatchOp.getOperation(0);
    ReplayWrite replayWrite = this.builder.getReplayWrite(firstMutation);
    boolean resetTimeStamp = replayWrite == null;
    long now = EnvironmentEdgeManager.currentTimeMillis();
    byte[] byteNow = Bytes.toBytes(now);
    for (int i = 0; i < miniBatchOp.size(); i++) {
        Mutation m = miniBatchOp.getOperation(i);
        // way optimization go though.
        if (miniBatchOp.getOperationStatus(i) != IGNORE && this.builder.isEnabled(m)) {
            if (resetTimeStamp) {
                // inconsistencies as this case isn't handled correctly currently).
                for (List<Cell> family : m.getFamilyCellMap().values()) {
                    List<KeyValue> familyKVs = KeyValueUtil.ensureKeyValues(family);
                    for (KeyValue kv : familyKVs) {
                        setTimeStamp(kv, byteNow);
                    }
                }
            }
            // the index as they're already written and just being replayed.
            if (replayWrite == ReplayWrite.INDEX_ONLY) {
                miniBatchOp.setOperationStatus(i, NOWRITE);
            }
            // Put and a Delete mutation for the same row).
            if (copyMutations) {
                // Add the mutation to the batch set
                ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
                MultiMutation stored = mutationsMap.get(row);
                // we haven't seen this row before, so add it
                if (stored == null) {
                    stored = new MultiMutation(row);
                    mutationsMap.put(row, stored);
                }
                stored.addAll(m);
            } else {
                originalMutations.add(m);
            }
        }
    }
    // dump all the index updates into a single WAL. They will get combined in the end anyways, so
    // don't worry which one we get
    WALEdit edit = miniBatchOp.getWalEdit(0);
    if (edit == null) {
        edit = new WALEdit();
        miniBatchOp.setWalEdit(0, edit);
    }
    if (copyMutations || replayWrite != null) {
        mutations = IndexManagementUtil.flattenMutationsByTimestamp(mutations);
    }
    // get the current span, or just use a null-span to avoid a bunch of if statements
    try (TraceScope scope = Trace.startSpan("Starting to build index updates")) {
        Span current = scope.getSpan();
        if (current == null) {
            current = NullSpan.INSTANCE;
        }
        long start = EnvironmentEdgeManager.currentTimeMillis();
        // get the index updates for all elements in this batch
        Collection<Pair<Mutation, byte[]>> indexUpdates = this.builder.getIndexUpdate(miniBatchOp, mutations);
        long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
        if (duration >= slowIndexPrepareThreshold) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(getCallTooSlowMessage("indexPrepare", duration, slowIndexPrepareThreshold));
            }
            metricSource.incrementNumSlowIndexPrepareCalls();
        }
        metricSource.updateIndexPrepareTime(duration);
        current.addTimelineAnnotation("Built index updates, doing preStep");
        TracingUtils.addAnnotation(current, "index update count", indexUpdates.size());
        byte[] tableName = c.getEnvironment().getRegion().getTableDesc().getTableName().getName();
        Iterator<Pair<Mutation, byte[]>> indexUpdatesItr = indexUpdates.iterator();
        List<Mutation> localUpdates = new ArrayList<Mutation>(indexUpdates.size());
        while (indexUpdatesItr.hasNext()) {
            Pair<Mutation, byte[]> next = indexUpdatesItr.next();
            if (Bytes.compareTo(next.getSecond(), tableName) == 0) {
                localUpdates.add(next.getFirst());
                indexUpdatesItr.remove();
            }
        }
        if (!localUpdates.isEmpty()) {
            miniBatchOp.addOperationsFromCP(0, localUpdates.toArray(new Mutation[localUpdates.size()]));
        }
        if (!indexUpdates.isEmpty()) {
            context.indexUpdates = indexUpdates;
            // write index updates to WAL
            if (durability != Durability.SKIP_WAL) {
                // we have all the WAL durability, so we just update the WAL entry and move on
                for (Pair<Mutation, byte[]> entry : indexUpdates) {
                    edit.add(new IndexedKeyValue(entry.getSecond(), entry.getFirst()));
                }
            }
        }
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) IndexedKeyValue(org.apache.phoenix.hbase.index.wal.IndexedKeyValue) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Span(org.apache.htrace.Span) NullSpan(org.apache.phoenix.trace.util.NullSpan) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Cell(org.apache.hadoop.hbase.Cell) Pair(org.apache.hadoop.hbase.util.Pair) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) TraceScope(org.apache.htrace.TraceScope) Durability(org.apache.hadoop.hbase.client.Durability) ReplayWrite(org.apache.phoenix.coprocessor.BaseScannerRegionObserver.ReplayWrite) Mutation(org.apache.hadoop.hbase.client.Mutation) IndexedKeyValue(org.apache.phoenix.hbase.index.wal.IndexedKeyValue)

Aggregations

WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)29 WALKey (org.apache.hadoop.hbase.wal.WALKey)13 Cell (org.apache.hadoop.hbase.Cell)10 WAL (org.apache.hadoop.hbase.wal.WAL)9 KeyValue (org.apache.hadoop.hbase.KeyValue)8 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)7 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)7 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)7 Put (org.apache.hadoop.hbase.client.Put)5 IOException (java.io.IOException)4 List (java.util.List)4 TreeMap (java.util.TreeMap)4 AtomicLong (java.util.concurrent.atomic.AtomicLong)4 Path (org.apache.hadoop.fs.Path)4 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)4 TableName (org.apache.hadoop.hbase.TableName)4 Mutation (org.apache.hadoop.hbase.client.Mutation)4 Entry (org.apache.hadoop.hbase.wal.WAL.Entry)4 HashMap (java.util.HashMap)3