use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project hbase by apache.
the class HRegion method doPreBatchMutateHook.
private void doPreBatchMutateHook(BatchOperation<?> batchOp) throws IOException {
/* Run coprocessor pre hook outside of locks to avoid deadlock */
WALEdit walEdit = new WALEdit();
if (coprocessorHost != null) {
for (int i = 0; i < batchOp.operations.length; i++) {
Mutation m = batchOp.getMutation(i);
if (m instanceof Put) {
if (coprocessorHost.prePut((Put) m, walEdit, m.getDurability())) {
// pre hook says skip this Put
// mark as success and skip in doMiniBatchMutation
batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
}
} else if (m instanceof Delete) {
Delete curDel = (Delete) m;
if (curDel.getFamilyCellMap().isEmpty()) {
// handle deleting a row case
prepareDelete(curDel);
}
if (coprocessorHost.preDelete(curDel, walEdit, m.getDurability())) {
// pre hook says skip this Delete
// mark as success and skip in doMiniBatchMutation
batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
}
} else {
// In case of passing Append mutations along with the Puts and Deletes in batchMutate
// mark the operation return code as failure so that it will not be considered in
// the doMiniBatchMutation
batchOp.retCodeDetails[i] = new OperationStatus(OperationStatusCode.FAILURE, "Put/Delete mutations only supported in batchMutate() now");
}
if (!walEdit.isEmpty()) {
batchOp.walEditsFromCoprocessors[i] = walEdit;
walEdit = new WALEdit();
}
}
}
}
use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project hbase by apache.
the class HRegion method replayRecoveredEdits.
/*
* @param edits File of recovered edits.
* @param maxSeqIdInStores Maximum sequenceid found in each store. Edits in wal
* must be larger than this to be replayed for each store.
* @param reporter
* @return the sequence id of the last edit added to this region out of the
* recovered edits log or <code>minSeqId</code> if nothing added from editlogs.
* @throws IOException
*/
private long replayRecoveredEdits(final Path edits, Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter) throws IOException {
String msg = "Replaying edits from " + edits;
LOG.info(msg);
MonitoredTask status = TaskMonitor.get().createStatus(msg);
FileSystem fs = this.fs.getFileSystem();
status.setStatus("Opening recovered edits");
WAL.Reader reader = null;
try {
reader = WALFactory.createReader(fs, edits, conf);
long currentEditSeqId = -1;
long currentReplaySeqId = -1;
long firstSeqIdInLog = -1;
long skippedEdits = 0;
long editsCount = 0;
long intervalEdits = 0;
WAL.Entry entry;
HStore store = null;
boolean reported_once = false;
ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();
try {
// How many edits seen before we check elapsed time
int interval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
// How often to send a progress report (default 1/2 master timeout)
int period = this.conf.getInt("hbase.hstore.report.period", 300000);
long lastReport = EnvironmentEdgeManager.currentTime();
if (coprocessorHost != null) {
coprocessorHost.preReplayWALs(this.getRegionInfo(), edits);
}
while ((entry = reader.next()) != null) {
WALKey key = entry.getKey();
WALEdit val = entry.getEdit();
if (ng != null) {
// some test, or nonces disabled
ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
}
if (reporter != null) {
intervalEdits += val.size();
if (intervalEdits >= interval) {
// Number of edits interval reached
intervalEdits = 0;
long cur = EnvironmentEdgeManager.currentTime();
if (lastReport + period <= cur) {
status.setStatus("Replaying edits..." + " skipped=" + skippedEdits + " edits=" + editsCount);
// Timeout reached
if (!reporter.progress()) {
msg = "Progressable reporter failed, stopping replay";
LOG.warn(msg);
status.abort(msg);
throw new IOException(msg);
}
reported_once = true;
lastReport = cur;
}
}
}
if (firstSeqIdInLog == -1) {
firstSeqIdInLog = key.getLogSeqNum();
}
if (currentEditSeqId > key.getLogSeqNum()) {
// when this condition is true, it means we have a serious defect because we need to
// maintain increasing SeqId for WAL edits per region
LOG.error(getRegionInfo().getEncodedName() + " : " + "Found decreasing SeqId. PreId=" + currentEditSeqId + " key=" + key + "; edit=" + val);
} else {
currentEditSeqId = key.getLogSeqNum();
}
currentReplaySeqId = (key.getOrigLogSeqNum() > 0) ? key.getOrigLogSeqNum() : currentEditSeqId;
// instead of a KeyValue.
if (coprocessorHost != null) {
status.setStatus("Running pre-WAL-restore hook in coprocessors");
if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
// if bypass this wal entry, ignore it ...
continue;
}
}
boolean checkRowWithinBoundary = false;
// Check this edit is for this region.
if (!Bytes.equals(key.getEncodedRegionName(), this.getRegionInfo().getEncodedNameAsBytes())) {
checkRowWithinBoundary = true;
}
boolean flush = false;
MemstoreSize memstoreSize = new MemstoreSize();
for (Cell cell : val.getCells()) {
// METACOLUMN info such as HBASE::CACHEFLUSH entries
if (CellUtil.matchingFamily(cell, WALEdit.METAFAMILY)) {
// if region names don't match, skipp replaying compaction marker
if (!checkRowWithinBoundary) {
//this is a special edit, we should handle it
CompactionDescriptor compaction = WALEdit.getCompaction(cell);
if (compaction != null) {
//replay the compaction
replayWALCompactionMarker(compaction, false, true, Long.MAX_VALUE);
}
}
skippedEdits++;
continue;
}
// Figure which store the edit is meant for.
if (store == null || !CellUtil.matchingFamily(cell, store.getFamily().getName())) {
store = getHStore(cell);
}
if (store == null) {
// This should never happen. Perhaps schema was changed between
// crash and redeploy?
LOG.warn("No family for " + cell);
skippedEdits++;
continue;
}
if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
LOG.warn("Row of " + cell + " is not within region boundary");
skippedEdits++;
continue;
}
// Now, figure if we should skip this edit.
if (key.getLogSeqNum() <= maxSeqIdInStores.get(store.getFamily().getName())) {
skippedEdits++;
continue;
}
CellUtil.setSequenceId(cell, currentReplaySeqId);
restoreEdit(store, cell, memstoreSize);
editsCount++;
}
if (this.rsAccounting != null) {
rsAccounting.addRegionReplayEditsSize(getRegionInfo().getRegionName(), memstoreSize);
}
flush = isFlushSize(this.addAndGetMemstoreSize(memstoreSize));
if (flush) {
internalFlushcache(null, currentEditSeqId, stores.values(), status, false);
}
if (coprocessorHost != null) {
coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
}
}
if (coprocessorHost != null) {
coprocessorHost.postReplayWALs(this.getRegionInfo(), edits);
}
} catch (EOFException eof) {
Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
msg = "EnLongAddered EOF. Most likely due to Master failure during " + "wal splitting, so we have this data in another edit. " + "Continuing, but renaming " + edits + " as " + p;
LOG.warn(msg, eof);
status.abort(msg);
} catch (IOException ioe) {
// then this problem is idempotent and retrying won't help
if (ioe.getCause() instanceof ParseException) {
Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
msg = "File corruption enLongAddered! " + "Continuing, but renaming " + edits + " as " + p;
LOG.warn(msg, ioe);
status.setStatus(msg);
} else {
status.abort(StringUtils.stringifyException(ioe));
// checksum exception on one datanode, etc). throw & retry
throw ioe;
}
}
if (reporter != null && !reported_once) {
reporter.progress();
}
msg = "Applied " + editsCount + ", skipped " + skippedEdits + ", firstSequenceIdInLog=" + firstSeqIdInLog + ", maxSequenceIdInLog=" + currentEditSeqId + ", path=" + edits;
status.markComplete(msg);
LOG.debug(msg);
return currentEditSeqId;
} finally {
status.cleanup();
if (reader != null) {
reader.close();
}
}
}
use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.
the class SystemCatalogWALEntryFilterIT method testOtherTablesAutoPass.
@Test
public void testOtherTablesAutoPass() throws Exception {
// Cell is nonsense but we should auto pass because the table name's not System.Catalog
WAL.Entry entry = new WAL.Entry(new WALKey(REGION, TableName.valueOf(TestUtil.ENTITY_HISTORY_TABLE_NAME)), new WALEdit());
entry.getEdit().add(CellUtil.createCell(Bytes.toBytes("foo")));
SystemCatalogWALEntryFilter filter = new SystemCatalogWALEntryFilter();
Assert.assertEquals(1, filter.filter(entry).getEdit().size());
}
use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.
the class SystemCatalogWALEntryFilterIT method testSystemCatalogWALEntryFilter.
@Test
public void testSystemCatalogWALEntryFilter() throws Exception {
// now create WAL.Entry objects that refer to cells in those view rows in System.Catalog
Get tenantViewGet = getTenantViewGet(catalogTable, TENANT_BYTES, TENANT_VIEW_NAME);
Get nonTenantViewGet = getTenantViewGet(catalogTable, DEFAULT_TENANT_BYTES, NONTENANT_VIEW_NAME);
Get tenantLinkGet = getParentChildLinkGet(catalogTable, TENANT_BYTES, TENANT_VIEW_NAME);
Get nonTenantLinkGet = getParentChildLinkGet(catalogTable, DEFAULT_TENANT_BYTES, NONTENANT_VIEW_NAME);
WAL.Entry nonTenantViewEntry = getEntry(systemCatalogTableName, nonTenantViewGet);
WAL.Entry tenantViewEntry = getEntry(systemCatalogTableName, tenantViewGet);
WAL.Entry nonTenantLinkEntry = getEntry(systemCatalogTableName, nonTenantLinkGet);
WAL.Entry tenantLinkEntry = getEntry(systemCatalogTableName, tenantLinkGet);
// verify that the tenant view WAL.Entry passes the filter and the non-tenant view does not
SystemCatalogWALEntryFilter filter = new SystemCatalogWALEntryFilter();
Assert.assertNull(filter.filter(nonTenantViewEntry));
WAL.Entry filteredTenantEntry = filter.filter(tenantViewEntry);
Assert.assertNotNull("Tenant view was filtered when it shouldn't be!", filteredTenantEntry);
Assert.assertEquals(tenantViewEntry.getEdit().size(), filter.filter(tenantViewEntry).getEdit().size());
// now check that a WAL.Entry with cells from both a tenant and a non-tenant
// catalog row only allow the tenant cells through
WALEdit comboEdit = new WALEdit();
comboEdit.getCells().addAll(nonTenantViewEntry.getEdit().getCells());
comboEdit.getCells().addAll(tenantViewEntry.getEdit().getCells());
WAL.Entry comboEntry = new WAL.Entry(walKey, comboEdit);
Assert.assertEquals(tenantViewEntry.getEdit().size() + nonTenantViewEntry.getEdit().size(), comboEntry.getEdit().size());
Assert.assertEquals(tenantViewEntry.getEdit().size(), filter.filter(comboEntry).getEdit().size());
// now check that the parent-child links (which have the tenant_id of the view's parent,
// but are a part of the view's metadata) are migrated in the tenant case
// but not the non-tenant. The view's tenant_id is in th System.Catalog.COLUMN_NAME field
Assert.assertNull("Non-tenant parent-child link was not filtered " + "when it should be!", filter.filter(nonTenantLinkEntry));
Assert.assertNotNull("Tenant parent-child link was filtered when it should not be!", filter.filter(tenantLinkEntry));
Assert.assertEquals(tenantLinkEntry.getEdit().size(), filter.filter(tenantLinkEntry).getEdit().size());
// add the parent-child link to the tenant view WAL entry,
// since they'll usually be together and they both need to
// be replicated
tenantViewEntry.getEdit().getCells().addAll(tenantLinkEntry.getEdit().getCells());
Assert.assertEquals(tenantViewEntry.getEdit().size(), tenantViewEntry.getEdit().size());
}
use of org.apache.hadoop.hbase.regionserver.wal.WALEdit in project phoenix by apache.
the class Indexer method preBatchMutateWithExceptions.
public void preBatchMutateWithExceptions(ObserverContext<RegionCoprocessorEnvironment> c, MiniBatchOperationInProgress<Mutation> miniBatchOp) throws Throwable {
// first group all the updates for a single row into a single update to be processed
Map<ImmutableBytesPtr, MultiMutation> mutationsMap = new HashMap<ImmutableBytesPtr, MultiMutation>();
Durability defaultDurability = Durability.SYNC_WAL;
if (c.getEnvironment().getRegion() != null) {
defaultDurability = c.getEnvironment().getRegion().getTableDesc().getDurability();
defaultDurability = (defaultDurability == Durability.USE_DEFAULT) ? Durability.SYNC_WAL : defaultDurability;
}
/*
* Exclusively lock all rows so we get a consistent read
* while determining the index updates
*/
BatchMutateContext context = new BatchMutateContext();
setBatchMutateContext(c, context);
Durability durability = Durability.SKIP_WAL;
boolean copyMutations = false;
for (int i = 0; i < miniBatchOp.size(); i++) {
Mutation m = miniBatchOp.getOperation(i);
if (this.builder.isAtomicOp(m)) {
miniBatchOp.setOperationStatus(i, IGNORE);
continue;
}
if (this.builder.isEnabled(m)) {
context.rowLocks.add(lockManager.lockRow(m.getRow(), rowLockWaitDuration));
Durability effectiveDurablity = (m.getDurability() == Durability.USE_DEFAULT) ? defaultDurability : m.getDurability();
if (effectiveDurablity.ordinal() > durability.ordinal()) {
durability = effectiveDurablity;
}
// Track whether or not we need to
ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
if (mutationsMap.containsKey(row)) {
copyMutations = true;
} else {
mutationsMap.put(row, null);
}
}
}
// early exit if it turns out we don't have any edits
if (mutationsMap.isEmpty()) {
return;
}
// If we're copying the mutations
Collection<Mutation> originalMutations;
Collection<? extends Mutation> mutations;
if (copyMutations) {
originalMutations = null;
mutations = mutationsMap.values();
} else {
originalMutations = Lists.newArrayListWithExpectedSize(mutationsMap.size());
mutations = originalMutations;
}
Mutation firstMutation = miniBatchOp.getOperation(0);
ReplayWrite replayWrite = this.builder.getReplayWrite(firstMutation);
boolean resetTimeStamp = replayWrite == null;
long now = EnvironmentEdgeManager.currentTimeMillis();
byte[] byteNow = Bytes.toBytes(now);
for (int i = 0; i < miniBatchOp.size(); i++) {
Mutation m = miniBatchOp.getOperation(i);
// way optimization go though.
if (miniBatchOp.getOperationStatus(i) != IGNORE && this.builder.isEnabled(m)) {
if (resetTimeStamp) {
// inconsistencies as this case isn't handled correctly currently).
for (List<Cell> family : m.getFamilyCellMap().values()) {
List<KeyValue> familyKVs = KeyValueUtil.ensureKeyValues(family);
for (KeyValue kv : familyKVs) {
setTimeStamp(kv, byteNow);
}
}
}
// the index as they're already written and just being replayed.
if (replayWrite == ReplayWrite.INDEX_ONLY) {
miniBatchOp.setOperationStatus(i, NOWRITE);
}
// Put and a Delete mutation for the same row).
if (copyMutations) {
// Add the mutation to the batch set
ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
MultiMutation stored = mutationsMap.get(row);
// we haven't seen this row before, so add it
if (stored == null) {
stored = new MultiMutation(row);
mutationsMap.put(row, stored);
}
stored.addAll(m);
} else {
originalMutations.add(m);
}
}
}
// dump all the index updates into a single WAL. They will get combined in the end anyways, so
// don't worry which one we get
WALEdit edit = miniBatchOp.getWalEdit(0);
if (edit == null) {
edit = new WALEdit();
miniBatchOp.setWalEdit(0, edit);
}
if (copyMutations || replayWrite != null) {
mutations = IndexManagementUtil.flattenMutationsByTimestamp(mutations);
}
// get the current span, or just use a null-span to avoid a bunch of if statements
try (TraceScope scope = Trace.startSpan("Starting to build index updates")) {
Span current = scope.getSpan();
if (current == null) {
current = NullSpan.INSTANCE;
}
long start = EnvironmentEdgeManager.currentTimeMillis();
// get the index updates for all elements in this batch
Collection<Pair<Mutation, byte[]>> indexUpdates = this.builder.getIndexUpdate(miniBatchOp, mutations);
long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
if (duration >= slowIndexPrepareThreshold) {
if (LOG.isDebugEnabled()) {
LOG.debug(getCallTooSlowMessage("indexPrepare", duration, slowIndexPrepareThreshold));
}
metricSource.incrementNumSlowIndexPrepareCalls();
}
metricSource.updateIndexPrepareTime(duration);
current.addTimelineAnnotation("Built index updates, doing preStep");
TracingUtils.addAnnotation(current, "index update count", indexUpdates.size());
byte[] tableName = c.getEnvironment().getRegion().getTableDesc().getTableName().getName();
Iterator<Pair<Mutation, byte[]>> indexUpdatesItr = indexUpdates.iterator();
List<Mutation> localUpdates = new ArrayList<Mutation>(indexUpdates.size());
while (indexUpdatesItr.hasNext()) {
Pair<Mutation, byte[]> next = indexUpdatesItr.next();
if (Bytes.compareTo(next.getSecond(), tableName) == 0) {
localUpdates.add(next.getFirst());
indexUpdatesItr.remove();
}
}
if (!localUpdates.isEmpty()) {
miniBatchOp.addOperationsFromCP(0, localUpdates.toArray(new Mutation[localUpdates.size()]));
}
if (!indexUpdates.isEmpty()) {
context.indexUpdates = indexUpdates;
// write index updates to WAL
if (durability != Durability.SKIP_WAL) {
// we have all the WAL durability, so we just update the WAL entry and move on
for (Pair<Mutation, byte[]> entry : indexUpdates) {
edit.add(new IndexedKeyValue(entry.getSecond(), entry.getFirst()));
}
}
}
}
}
Aggregations