use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.
the class HRegion method replayRecoveredEdits.
/*
* @param edits File of recovered edits.
* @param maxSeqIdInStores Maximum sequenceid found in each store. Edits in wal
* must be larger than this to be replayed for each store.
* @param reporter
* @return the sequence id of the last edit added to this region out of the
* recovered edits log or <code>minSeqId</code> if nothing added from editlogs.
* @throws IOException
*/
private long replayRecoveredEdits(final Path edits, Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter) throws IOException {
String msg = "Replaying edits from " + edits;
LOG.info(msg);
MonitoredTask status = TaskMonitor.get().createStatus(msg);
FileSystem fs = this.fs.getFileSystem();
status.setStatus("Opening recovered edits");
WAL.Reader reader = null;
try {
reader = WALFactory.createReader(fs, edits, conf);
long currentEditSeqId = -1;
long currentReplaySeqId = -1;
long firstSeqIdInLog = -1;
long skippedEdits = 0;
long editsCount = 0;
long intervalEdits = 0;
WAL.Entry entry;
HStore store = null;
boolean reported_once = false;
ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();
try {
// How many edits seen before we check elapsed time
int interval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
// How often to send a progress report (default 1/2 master timeout)
int period = this.conf.getInt("hbase.hstore.report.period", 300000);
long lastReport = EnvironmentEdgeManager.currentTime();
if (coprocessorHost != null) {
coprocessorHost.preReplayWALs(this.getRegionInfo(), edits);
}
while ((entry = reader.next()) != null) {
WALKey key = entry.getKey();
WALEdit val = entry.getEdit();
if (ng != null) {
// some test, or nonces disabled
ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
}
if (reporter != null) {
intervalEdits += val.size();
if (intervalEdits >= interval) {
// Number of edits interval reached
intervalEdits = 0;
long cur = EnvironmentEdgeManager.currentTime();
if (lastReport + period <= cur) {
status.setStatus("Replaying edits..." + " skipped=" + skippedEdits + " edits=" + editsCount);
// Timeout reached
if (!reporter.progress()) {
msg = "Progressable reporter failed, stopping replay";
LOG.warn(msg);
status.abort(msg);
throw new IOException(msg);
}
reported_once = true;
lastReport = cur;
}
}
}
if (firstSeqIdInLog == -1) {
firstSeqIdInLog = key.getLogSeqNum();
}
if (currentEditSeqId > key.getLogSeqNum()) {
// when this condition is true, it means we have a serious defect because we need to
// maintain increasing SeqId for WAL edits per region
LOG.error(getRegionInfo().getEncodedName() + " : " + "Found decreasing SeqId. PreId=" + currentEditSeqId + " key=" + key + "; edit=" + val);
} else {
currentEditSeqId = key.getLogSeqNum();
}
currentReplaySeqId = (key.getOrigLogSeqNum() > 0) ? key.getOrigLogSeqNum() : currentEditSeqId;
// instead of a KeyValue.
if (coprocessorHost != null) {
status.setStatus("Running pre-WAL-restore hook in coprocessors");
if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
// if bypass this wal entry, ignore it ...
continue;
}
}
boolean checkRowWithinBoundary = false;
// Check this edit is for this region.
if (!Bytes.equals(key.getEncodedRegionName(), this.getRegionInfo().getEncodedNameAsBytes())) {
checkRowWithinBoundary = true;
}
boolean flush = false;
MemstoreSize memstoreSize = new MemstoreSize();
for (Cell cell : val.getCells()) {
// METACOLUMN info such as HBASE::CACHEFLUSH entries
if (CellUtil.matchingFamily(cell, WALEdit.METAFAMILY)) {
// if region names don't match, skipp replaying compaction marker
if (!checkRowWithinBoundary) {
//this is a special edit, we should handle it
CompactionDescriptor compaction = WALEdit.getCompaction(cell);
if (compaction != null) {
//replay the compaction
replayWALCompactionMarker(compaction, false, true, Long.MAX_VALUE);
}
}
skippedEdits++;
continue;
}
// Figure which store the edit is meant for.
if (store == null || !CellUtil.matchingFamily(cell, store.getFamily().getName())) {
store = getHStore(cell);
}
if (store == null) {
// This should never happen. Perhaps schema was changed between
// crash and redeploy?
LOG.warn("No family for " + cell);
skippedEdits++;
continue;
}
if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
LOG.warn("Row of " + cell + " is not within region boundary");
skippedEdits++;
continue;
}
// Now, figure if we should skip this edit.
if (key.getLogSeqNum() <= maxSeqIdInStores.get(store.getFamily().getName())) {
skippedEdits++;
continue;
}
CellUtil.setSequenceId(cell, currentReplaySeqId);
restoreEdit(store, cell, memstoreSize);
editsCount++;
}
if (this.rsAccounting != null) {
rsAccounting.addRegionReplayEditsSize(getRegionInfo().getRegionName(), memstoreSize);
}
flush = isFlushSize(this.addAndGetMemstoreSize(memstoreSize));
if (flush) {
internalFlushcache(null, currentEditSeqId, stores.values(), status, false);
}
if (coprocessorHost != null) {
coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
}
}
if (coprocessorHost != null) {
coprocessorHost.postReplayWALs(this.getRegionInfo(), edits);
}
} catch (EOFException eof) {
Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
msg = "EnLongAddered EOF. Most likely due to Master failure during " + "wal splitting, so we have this data in another edit. " + "Continuing, but renaming " + edits + " as " + p;
LOG.warn(msg, eof);
status.abort(msg);
} catch (IOException ioe) {
// then this problem is idempotent and retrying won't help
if (ioe.getCause() instanceof ParseException) {
Path p = WALSplitter.moveAsideBadEditsFile(fs, edits);
msg = "File corruption enLongAddered! " + "Continuing, but renaming " + edits + " as " + p;
LOG.warn(msg, ioe);
status.setStatus(msg);
} else {
status.abort(StringUtils.stringifyException(ioe));
// checksum exception on one datanode, etc). throw & retry
throw ioe;
}
}
if (reporter != null && !reported_once) {
reporter.progress();
}
msg = "Applied " + editsCount + ", skipped " + skippedEdits + ", firstSequenceIdInLog=" + firstSeqIdInLog + ", maxSequenceIdInLog=" + currentEditSeqId + ", path=" + edits;
status.markComplete(msg);
LOG.debug(msg);
return currentEditSeqId;
} finally {
status.cleanup();
if (reader != null) {
reader.close();
}
}
}
use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.
the class HRegion method setRecovering.
/**
* Reset recovering state of current region
*/
public void setRecovering(boolean newState) {
boolean wasRecovering = this.recovering;
// event to WAL if needed
if (wal != null && getRegionServerServices() != null && !writestate.readOnly && wasRecovering && !newState) {
// force a flush only if region replication is set up for this region. Otherwise no need.
boolean forceFlush = getTableDesc().getRegionReplication() > 1;
MonitoredTask status = TaskMonitor.get().createStatus("Recovering region " + this);
try {
// force a flush first
if (forceFlush) {
status.setStatus("Flushing region " + this + " because recovery is finished");
internalFlushcache(status);
}
status.setStatus("Writing region open event marker to WAL because recovery is finished");
try {
long seqId = openSeqNum;
// obtain a new seqId because we possibly have writes and flushes on top of openSeqNum
if (wal != null) {
seqId = getNextSequenceId(wal);
}
writeRegionOpenMarker(wal, seqId);
} catch (IOException e) {
// We cannot rethrow this exception since we are being called from the zk thread. The
// region has already opened. In this case we log the error, but continue
LOG.warn(getRegionInfo().getEncodedName() + " : was not able to write region opening " + "event to WAL, continuing", e);
}
} catch (IOException ioe) {
// Distributed log replay semantics does not necessarily require a flush, since the replayed
// data is already written again in the WAL. So failed flush should be fine.
LOG.warn(getRegionInfo().getEncodedName() + " : was not able to flush " + "event to WAL, continuing", ioe);
} finally {
status.cleanup();
}
}
this.recovering = newState;
if (wasRecovering && !recovering) {
// Call only when wal replay is over.
coprocessorHost.postLogReplay();
}
}
use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.
the class AbstractTestWALReplay method testReplayEditsWrittenIntoWAL.
/**
* Create an HRegion with the result of a WAL split and test we only see the
* good edits
* @throws Exception
*/
@Test
public void testReplayEditsWrittenIntoWAL() throws Exception {
final TableName tableName = TableName.valueOf("testReplayEditsWrittenIntoWAL");
final MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
final HRegionInfo hri = createBasic3FamilyHRegionInfo(tableName);
final Path basedir = FSUtils.getTableDir(hbaseRootDir, tableName);
deleteDir(basedir);
final HTableDescriptor htd = createBasic3FamilyHTD(tableName);
HRegion region2 = HBaseTestingUtility.createRegionAndWAL(hri, hbaseRootDir, this.conf, htd);
HBaseTestingUtility.closeRegionAndWAL(region2);
final WAL wal = createWAL(this.conf, hbaseRootDir, logName);
final byte[] rowName = tableName.getName();
final byte[] regionName = hri.getEncodedNameAsBytes();
// Add 1k to each family.
final int countPerFamily = 1000;
Set<byte[]> familyNames = new HashSet<>();
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for (byte[] fam : htd.getFamiliesKeys()) {
scopes.put(fam, 0);
}
for (HColumnDescriptor hcd : htd.getFamilies()) {
addWALEdits(tableName, hri, rowName, hcd.getName(), countPerFamily, ee, wal, htd, mvcc, scopes);
familyNames.add(hcd.getName());
}
// Add a cache flush, shouldn't have any effect
wal.startCacheFlush(regionName, familyNames);
wal.completeCacheFlush(regionName);
// Add an edit to another family, should be skipped.
WALEdit edit = new WALEdit();
long now = ee.currentTime();
edit.add(new KeyValue(rowName, Bytes.toBytes("another family"), rowName, now, rowName));
wal.append(hri, new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes), edit, true);
// Delete the c family to verify deletes make it over.
edit = new WALEdit();
now = ee.currentTime();
edit.add(new KeyValue(rowName, Bytes.toBytes("c"), null, now, KeyValue.Type.DeleteFamily));
wal.append(hri, new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes), edit, true);
// Sync.
wal.sync();
// Make a new conf and a new fs for the splitter to run on so we can take
// over old wal.
final Configuration newConf = HBaseConfiguration.create(this.conf);
User user = HBaseTestingUtility.getDifferentUser(newConf, ".replay.wal.secondtime");
user.runAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
runWALSplit(newConf);
FileSystem newFS = FileSystem.get(newConf);
// 100k seems to make for about 4 flushes during HRegion#initialize.
newConf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 100);
// Make a new wal for new region.
WAL newWal = createWAL(newConf, hbaseRootDir, logName);
final AtomicInteger flushcount = new AtomicInteger(0);
try {
final HRegion region = new HRegion(basedir, newWal, newFS, newConf, hri, htd, null) {
@Override
protected FlushResult internalFlushcache(final WAL wal, final long myseqid, final Collection<Store> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker) throws IOException {
LOG.info("InternalFlushCache Invoked");
FlushResult fs = super.internalFlushcache(wal, myseqid, storesToFlush, Mockito.mock(MonitoredTask.class), writeFlushWalMarker);
flushcount.incrementAndGet();
return fs;
}
};
// The seq id this region has opened up with
long seqid = region.initialize();
// The mvcc readpoint of from inserting data.
long writePoint = mvcc.getWritePoint();
// We flushed during init.
assertTrue("Flushcount=" + flushcount.get(), flushcount.get() > 0);
assertTrue((seqid - 1) == writePoint);
Get get = new Get(rowName);
Result result = region.get(get);
// Make sure we only see the good edits
assertEquals(countPerFamily * (htd.getFamilies().size() - 1), result.size());
region.close();
} finally {
newWal.close();
}
return null;
}
});
}
use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.
the class TestMasterNoCluster method testNotPullingDeadRegionServerFromZK.
@Test
public void testNotPullingDeadRegionServerFromZK() throws IOException, KeeperException, InterruptedException {
final Configuration conf = TESTUTIL.getConfiguration();
final ServerName newServer = ServerName.valueOf("test.sample", 1, 101);
final ServerName deadServer = ServerName.valueOf("test.sample", 1, 100);
final MockRegionServer rs0 = new MockRegionServer(conf, newServer);
CoordinatedStateManager cp = CoordinatedStateManagerFactory.getCoordinatedStateManager(TESTUTIL.getConfiguration());
HMaster master = new HMaster(conf, cp) {
@Override
MasterMetaBootstrap createMetaBootstrap(final HMaster master, final MonitoredTask status) {
return new MasterMetaBootstrap(this, status) {
@Override
protected void assignMeta(Set<ServerName> previouslyFailedMeatRSs, int replicaId) {
}
};
}
@Override
void initClusterSchemaService() throws IOException, InterruptedException {
}
@Override
void initializeZKBasedSystemTrackers() throws IOException, InterruptedException, KeeperException, CoordinatedStateException {
super.initializeZKBasedSystemTrackers();
// Record a newer server in server manager at first
getServerManager().recordNewServerWithLock(newServer, ServerLoad.EMPTY_SERVERLOAD);
List<ServerName> onlineServers = new ArrayList<>();
onlineServers.add(deadServer);
onlineServers.add(newServer);
// Mock the region server tracker to pull the dead server from zk
regionServerTracker = Mockito.spy(regionServerTracker);
Mockito.doReturn(onlineServers).when(regionServerTracker).getOnlineServers();
}
@Override
public ClusterConnection getConnection() {
// associate so the below mocking of a connection will fail.
try {
return HConnectionTestingUtility.getMockedConnectionAndDecorate(TESTUTIL.getConfiguration(), rs0, rs0, rs0.getServerName(), HRegionInfo.FIRST_META_REGIONINFO);
} catch (IOException e) {
return null;
}
}
};
master.start();
try {
// Wait till master is initialized.
while (!master.isInitialized()) Threads.sleep(10);
LOG.info("Master is initialized");
assertFalse("The dead server should not be pulled in", master.getServerManager().isServerOnline(deadServer));
} finally {
master.stopMaster();
master.join();
}
}
use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.
the class TestHRegion method testStatusSettingToAbortIfAnyExceptionDuringRegionInitilization.
/**
* Testcase to check state of region initialization task set to ABORTED or not
* if any exceptions during initialization
*
* @throws Exception
*/
@Test
public void testStatusSettingToAbortIfAnyExceptionDuringRegionInitilization() throws Exception {
HRegionInfo info;
try {
FileSystem fs = Mockito.mock(FileSystem.class);
Mockito.when(fs.exists((Path) Mockito.anyObject())).thenThrow(new IOException());
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor("cf"));
info = new HRegionInfo(htd.getTableName(), HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, false);
Path path = new Path(dir + "testStatusSettingToAbortIfAnyExceptionDuringRegionInitilization");
region = HRegion.newHRegion(path, null, fs, CONF, info, htd, null);
// region initialization throws IOException and set task state to ABORTED.
region.initialize();
fail("Region initialization should fail due to IOException");
} catch (IOException io) {
List<MonitoredTask> tasks = TaskMonitor.get().getTasks();
for (MonitoredTask monitoredTask : tasks) {
if (!(monitoredTask instanceof MonitoredRPCHandler) && monitoredTask.getDescription().contains(region.toString())) {
assertTrue("Region state should be ABORTED.", monitoredTask.getState().equals(MonitoredTask.State.ABORTED));
break;
}
}
} finally {
HBaseTestingUtility.closeRegionAndWAL(region);
}
}
Aggregations