use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class RSRpcServices method replay.
/**
* Replay the given changes when distributedLogReplay WAL edits from a failed RS. The guarantee is
* that the given mutations will be durable on the receiving RS if this method returns without any
* exception.
* @param controller the RPC controller
* @param request the request
* @throws ServiceException
*/
@Override
@QosPriority(priority = HConstants.REPLAY_QOS)
public ReplicateWALEntryResponse replay(final RpcController controller, final ReplicateWALEntryRequest request) throws ServiceException {
long before = EnvironmentEdgeManager.currentTime();
CellScanner cells = ((HBaseRpcController) controller).cellScanner();
try {
checkOpen();
List<WALEntry> entries = request.getEntryList();
if (entries == null || entries.isEmpty()) {
// empty input
return ReplicateWALEntryResponse.newBuilder().build();
}
ByteString regionName = entries.get(0).getKey().getEncodedRegionName();
Region region = regionServer.getRegionByEncodedName(regionName.toStringUtf8());
RegionCoprocessorHost coprocessorHost = ServerRegionReplicaUtil.isDefaultReplica(region.getRegionInfo()) ? region.getCoprocessorHost() : // do not invoke coprocessors if this is a secondary region replica
null;
List<Pair<WALKey, WALEdit>> walEntries = new ArrayList<>();
// Skip adding the edits to WAL if this is a secondary region replica
boolean isPrimary = RegionReplicaUtil.isDefaultReplica(region.getRegionInfo());
Durability durability = isPrimary ? Durability.USE_DEFAULT : Durability.SKIP_WAL;
for (WALEntry entry : entries) {
if (!regionName.equals(entry.getKey().getEncodedRegionName())) {
throw new NotServingRegionException("Replay request contains entries from multiple " + "regions. First region:" + regionName.toStringUtf8() + " , other region:" + entry.getKey().getEncodedRegionName());
}
if (regionServer.nonceManager != null && isPrimary) {
long nonceGroup = entry.getKey().hasNonceGroup() ? entry.getKey().getNonceGroup() : HConstants.NO_NONCE;
long nonce = entry.getKey().hasNonce() ? entry.getKey().getNonce() : HConstants.NO_NONCE;
regionServer.nonceManager.reportOperationFromWal(nonceGroup, nonce, entry.getKey().getWriteTime());
}
Pair<WALKey, WALEdit> walEntry = (coprocessorHost == null) ? null : new Pair<>();
List<WALSplitter.MutationReplay> edits = WALSplitter.getMutationsFromWALEntry(entry, cells, walEntry, durability);
if (coprocessorHost != null) {
// KeyValue.
if (coprocessorHost.preWALRestore(region.getRegionInfo(), walEntry.getFirst(), walEntry.getSecond())) {
// if bypass this log entry, ignore it ...
continue;
}
walEntries.add(walEntry);
}
if (edits != null && !edits.isEmpty()) {
long replaySeqId = (entry.getKey().hasOrigSequenceNumber()) ? entry.getKey().getOrigSequenceNumber() : entry.getKey().getLogSequenceNumber();
OperationStatus[] result = doReplayBatchOp(region, edits, replaySeqId);
// check if it's a partial success
for (int i = 0; result != null && i < result.length; i++) {
if (result[i] != OperationStatus.SUCCESS) {
throw new IOException(result[i].getExceptionMsg());
}
}
}
}
//sync wal at the end because ASYNC_WAL is used above
WAL wal = getWAL(region);
if (wal != null) {
wal.sync();
}
if (coprocessorHost != null) {
for (Pair<WALKey, WALEdit> entry : walEntries) {
coprocessorHost.postWALRestore(region.getRegionInfo(), entry.getFirst(), entry.getSecond());
}
}
return ReplicateWALEntryResponse.newBuilder().build();
} catch (IOException ie) {
throw new ServiceException(ie);
} finally {
if (regionServer.metricsRegionServer != null) {
regionServer.metricsRegionServer.updateReplay(EnvironmentEdgeManager.currentTime() - before);
}
}
}
use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class HRegion method appendCurrentNonces.
private void appendCurrentNonces(final Mutation mutation, final boolean replay, final WALEdit walEdit, final long now, final long currentNonceGroup, final long currentNonce) throws IOException {
if (walEdit.isEmpty())
return;
if (!replay)
throw new IOException("Multiple nonces per batch and not in replay");
WALKey walKey = new WALKey(this.getRegionInfo().getEncodedNameAsBytes(), this.htableDescriptor.getTableName(), now, mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc, this.getReplicationScope());
this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
// Complete the mvcc transaction started down in append else it will block others
this.mvcc.complete(walKey.getWriteEntry());
}
use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class HRegion method replayRecoveredEdits.
/*
* @param edits File of recovered edits.
* @param maxSeqIdInStores Maximum sequenceid found in each store. Edits in wal
* must be larger than this to be replayed for each store.
* @param reporter
* @return the sequence id of the last edit added to this region out of the
* recovered edits log or <code>minSeqId</code> if nothing added from editlogs.
* @throws IOException
*/
private long replayRecoveredEdits(final Path edits, Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter) throws IOException {
String msg = "Replaying edits from " + edits;
LOG.info(msg);
MonitoredTask status = TaskMonitor.get().createStatus(msg);
FileSystem fs = this.fs.getFileSystem();
status.setStatus("Opening recovered edits");
WAL.Reader reader = null;
try {
reader = WALFactory.createReader(fs, edits, conf);
long currentEditSeqId = -1;
long currentReplaySeqId = -1;
long firstSeqIdInLog = -1;
long skippedEdits = 0;
long editsCount = 0;
long intervalEdits = 0;
WAL.Entry entry;
HStore store = null;
boolean reported_once = false;
ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();
try {
// How many edits seen before we check elapsed time
int interval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
// How often to send a progress report (default 1/2 master timeout)
int period = this.conf.getInt("hbase.hstore.report.period", 300000);
long lastReport = EnvironmentEdgeManager.currentTime();
if (coprocessorHost != null) {
coprocessorHost.preReplayWALs(this.getRegionInfo(), edits);
}
while ((entry = reader.next()) != null) {
WALKey key = entry.getKey();
WALEdit val = entry.getEdit();
if (ng != null) {
// some test, or nonces disabled
ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
}
if (reporter != null) {
intervalEdits += val.size();
if (intervalEdits >= interval) {
// Number of edits interval reached
intervalEdits = 0;
long cur = EnvironmentEdgeManager.currentTime();
if (lastReport + period <= cur) {
status.setStatus("Replaying edits..." + " skipped=" + skippedEdits + " edits=" + editsCount);
// Timeout reached
if (!reporter.progress()) {
msg = "Progressable reporter failed, stopping replay";
LOG.warn(msg);
status.abort(msg);
throw new IOException(msg);
}
reported_once = true;
lastReport = cur;
}
}
}
if (firstSeqIdInLog == -1) {
firstSeqIdInLog = key.getLogSeqNum();
}
if (currentEditSeqId > key.getLogSeqNum()) {
// when this condition is true, it means we have a serious defect because we need to
// maintain increasing SeqId for WAL edits per region
LOG.error(getRegionInfo().getEncodedName() + " : " + "Found decreasing SeqId. PreId=" + currentEditSeqId + " key=" + key + "; edit=" + val);
} else {
currentEditSeqId = key.getLogSeqNum();
}
currentReplaySeqId = (key.getOrigLogSeqNum() > 0) ? key.getOrigLogSeqNum() : currentEditSeqId;
// instead of a KeyValue.
if (coprocessorHost != null) {
status.setStatus("Running pre-WAL-restore hook in coprocessors");
if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
// if bypass this wal entry, ignore it ...
continue;
}
}
boolean checkRowWithinBoundary = false;
// Check this edit is for this region.
if (!Bytes.equals(key.getEncodedRegionName(), this.getRegionInfo().getEncodedNameAsBytes())) {
checkRowWithinBoundary = true;
}
boolean flush = false;
MemstoreSize memstoreSize = new MemstoreSize();
for (Cell cell : val.getCells()) {
// METACOLUMN info such as HBASE::CACHEFLUSH entries
if (CellUtil.matchingFamily(cell, WALEdit.METAFAMILY)) {
// if region names don't match, skipp replaying compaction marker
if (!checkRowWithinBoundary) {
//this is a special edit, we should handle it
CompactionDescriptor compaction = WALEdit.getCompaction(cell);
if (compaction != null) {
//replay the compaction
replayWALCompactionMarker(compaction, false, true, Long.MAX_VALUE);
}
}
skippedEdits++;
continue;
}
// Figure which store the edit is meant for.
if (store == null || !CellUtil.matchingFamily(cell, store.getFamily().getName())) {
store = getHStore(cell);
}
if (store == null) {
// This should never happen. Perhaps schema was changed between
// crash and redeploy?
LOG.warn("No family for " + cell);
skippedEdits++;
continue;
}
if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
LOG.warn("Row of " + cell + " is not within region boundary");
skippedEdits++;
continue;
}
// Now, figure if we should skip this edit.
if (key.getLogSeqNum() <= maxSeqIdInStores.get(store.getFamily().getName())) {
skippedEdits++;
continue;
}
CellUtil.setSequenceId(cell, currentReplaySeqId);
restoreEdit(store, cell, memstoreSize);
editsCount++;
}
if (this.rsAccounting != null) {
rsAccounting.addRegionReplayEditsSize(getRegionInfo().getRegionName(), memstoreSize);
}
flush = isFlushSize(this.addAndGetMemstoreSize(memstoreSize));
if (flush) {
internalFlushcache(null, currentEditSeqId, stores.values(), status, false);
}
if (coprocessorHost != null) {
coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
}
}
if (coprocessorHost != null) {
coprocessorHost.postReplayWALs(this.getRegionInfo(), edits);
}
} catch (EOFException eof) {
Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
msg = "EnLongAddered EOF. Most likely due to Master failure during " + "wal splitting, so we have this data in another edit. " + "Continuing, but renaming " + edits + " as " + p;
LOG.warn(msg, eof);
status.abort(msg);
} catch (IOException ioe) {
// then this problem is idempotent and retrying won't help
if (ioe.getCause() instanceof ParseException) {
Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
msg = "File corruption enLongAddered! " + "Continuing, but renaming " + edits + " as " + p;
LOG.warn(msg, ioe);
status.setStatus(msg);
} else {
status.abort(StringUtils.stringifyException(ioe));
// checksum exception on one datanode, etc). throw & retry
throw ioe;
}
}
if (reporter != null && !reported_once) {
reporter.progress();
}
msg = "Applied " + editsCount + ", skipped " + skippedEdits + ", firstSequenceIdInLog=" + firstSeqIdInLog + ", maxSequenceIdInLog=" + currentEditSeqId + ", path=" + edits;
status.markComplete(msg);
LOG.debug(msg);
return currentEditSeqId;
} finally {
status.cleanup();
if (reader != null) {
reader.close();
}
}
}
use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class TestLogRollAbort method testLogRollAfterSplitStart.
/**
* Tests the case where a RegionServer enters a GC pause,
* comes back online after the master declared it dead and started to split.
* Want log rolling after a master split to fail. See HBASE-2312.
*/
@Test(timeout = 300000)
public void testLogRollAfterSplitStart() throws IOException {
LOG.info("Verify wal roll after split starts will fail.");
String logName = ServerName.valueOf("testLogRollAfterSplitStart", 16010, System.currentTimeMillis()).toString();
Path thisTestsDir = new Path(HBASELOGDIR, AbstractFSWALProvider.getWALDirectoryName(logName));
final WALFactory wals = new WALFactory(conf, null, logName);
try {
// put some entries in an WAL
TableName tableName = TableName.valueOf(this.getClass().getName());
HRegionInfo regioninfo = new HRegionInfo(tableName, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
final WAL log = wals.getWAL(regioninfo.getEncodedNameAsBytes(), regioninfo.getTable().getNamespace());
MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(1);
final int total = 20;
for (int i = 0; i < total; i++) {
WALEdit kvs = new WALEdit();
kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor("column"));
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for (byte[] fam : htd.getFamiliesKeys()) {
scopes.put(fam, 0);
}
log.append(regioninfo, new WALKey(regioninfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis(), mvcc, scopes), kvs, true);
}
// Send the data to HDFS datanodes and close the HDFS writer
log.sync();
((AbstractFSWAL<?>) log).replaceWriter(((FSHLog) log).getOldPath(), null, null);
/* code taken from MasterFileSystem.getLogDirs(), which is called from MasterFileSystem.splitLog()
* handles RS shutdowns (as observed by the splitting process)
*/
// rename the directory so a rogue RS doesn't create more WALs
Path rsSplitDir = thisTestsDir.suffix(AbstractFSWALProvider.SPLITTING_EXT);
if (!fs.rename(thisTestsDir, rsSplitDir)) {
throw new IOException("Failed fs.rename for log split: " + thisTestsDir);
}
LOG.debug("Renamed region directory: " + rsSplitDir);
LOG.debug("Processing the old log files.");
WALSplitter.split(HBASELOGDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals);
LOG.debug("Trying to roll the WAL.");
try {
log.rollWriter();
Assert.fail("rollWriter() did not throw any exception.");
} catch (IOException ioe) {
if (ioe.getCause() instanceof FileNotFoundException) {
LOG.info("Got the expected exception: ", ioe.getCause());
} else {
Assert.fail("Unexpected exception: " + ioe);
}
}
} finally {
wals.close();
if (fs.exists(thisTestsDir)) {
fs.delete(thisTestsDir, true);
}
}
}
use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class AbstractTestWALReplay method testReplayEditsWrittenIntoWAL.
/**
* Create an HRegion with the result of a WAL split and test we only see the
* good edits
* @throws Exception
*/
@Test
public void testReplayEditsWrittenIntoWAL() throws Exception {
final TableName tableName = TableName.valueOf("testReplayEditsWrittenIntoWAL");
final MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
final HRegionInfo hri = createBasic3FamilyHRegionInfo(tableName);
final Path basedir = FSUtils.getTableDir(hbaseRootDir, tableName);
deleteDir(basedir);
final HTableDescriptor htd = createBasic3FamilyHTD(tableName);
HRegion region2 = HBaseTestingUtility.createRegionAndWAL(hri, hbaseRootDir, this.conf, htd);
HBaseTestingUtility.closeRegionAndWAL(region2);
final WAL wal = createWAL(this.conf, hbaseRootDir, logName);
final byte[] rowName = tableName.getName();
final byte[] regionName = hri.getEncodedNameAsBytes();
// Add 1k to each family.
final int countPerFamily = 1000;
Set<byte[]> familyNames = new HashSet<>();
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for (byte[] fam : htd.getFamiliesKeys()) {
scopes.put(fam, 0);
}
for (HColumnDescriptor hcd : htd.getFamilies()) {
addWALEdits(tableName, hri, rowName, hcd.getName(), countPerFamily, ee, wal, htd, mvcc, scopes);
familyNames.add(hcd.getName());
}
// Add a cache flush, shouldn't have any effect
wal.startCacheFlush(regionName, familyNames);
wal.completeCacheFlush(regionName);
// Add an edit to another family, should be skipped.
WALEdit edit = new WALEdit();
long now = ee.currentTime();
edit.add(new KeyValue(rowName, Bytes.toBytes("another family"), rowName, now, rowName));
wal.append(hri, new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes), edit, true);
// Delete the c family to verify deletes make it over.
edit = new WALEdit();
now = ee.currentTime();
edit.add(new KeyValue(rowName, Bytes.toBytes("c"), null, now, KeyValue.Type.DeleteFamily));
wal.append(hri, new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes), edit, true);
// Sync.
wal.sync();
// Make a new conf and a new fs for the splitter to run on so we can take
// over old wal.
final Configuration newConf = HBaseConfiguration.create(this.conf);
User user = HBaseTestingUtility.getDifferentUser(newConf, ".replay.wal.secondtime");
user.runAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
runWALSplit(newConf);
FileSystem newFS = FileSystem.get(newConf);
// 100k seems to make for about 4 flushes during HRegion#initialize.
newConf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 100);
// Make a new wal for new region.
WAL newWal = createWAL(newConf, hbaseRootDir, logName);
final AtomicInteger flushcount = new AtomicInteger(0);
try {
final HRegion region = new HRegion(basedir, newWal, newFS, newConf, hri, htd, null) {
@Override
protected FlushResult internalFlushcache(final WAL wal, final long myseqid, final Collection<Store> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker) throws IOException {
LOG.info("InternalFlushCache Invoked");
FlushResult fs = super.internalFlushcache(wal, myseqid, storesToFlush, Mockito.mock(MonitoredTask.class), writeFlushWalMarker);
flushcount.incrementAndGet();
return fs;
}
};
// The seq id this region has opened up with
long seqid = region.initialize();
// The mvcc readpoint of from inserting data.
long writePoint = mvcc.getWritePoint();
// We flushed during init.
assertTrue("Flushcount=" + flushcount.get(), flushcount.get() > 0);
assertTrue((seqid - 1) == writePoint);
Get get = new Get(rowName);
Result result = region.get(get);
// Make sure we only see the good edits
assertEquals(countPerFamily * (htd.getFamilies().size() - 1), result.size());
region.close();
} finally {
newWal.close();
}
return null;
}
});
}
Aggregations