use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class WALProcedurePrettyPrinter method doWork.
@Override
protected int doWork() throws Exception {
Path path = new Path(file);
FileSystem fs = path.getFileSystem(conf);
try (WAL.Reader reader = WALFactory.createReader(fs, path, conf)) {
for (; ; ) {
WAL.Entry entry = reader.next();
if (entry == null) {
return 0;
}
WALKey key = entry.getKey();
WALEdit edit = entry.getEdit();
long sequenceId = key.getSequenceId();
long writeTime = key.getWriteTime();
out.println(String.format(KEY_TMPL, sequenceId, FORMATTER.format(Instant.ofEpochMilli(writeTime))));
for (Cell cell : edit.getCells()) {
Map<String, Object> op = WALPrettyPrinter.toStringMap(cell);
if (!Bytes.equals(PROC_FAMILY, 0, PROC_FAMILY.length, cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength())) {
// We could have cells other than procedure edits, for example, a flush marker
WALPrettyPrinter.printCell(out, op, false, false);
continue;
}
long procId = Bytes.toLong(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
out.println("pid=" + procId + ", type=" + op.get("type") + ", column=" + op.get("family") + ":" + op.get("qualifier"));
if (cell.getType() == Cell.Type.Put) {
if (cell.getValueLength() > 0) {
// should be a normal put
Procedure<?> proc = ProcedureUtil.convertToProcedure(ProcedureProtos.Procedure.parser().parseFrom(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
out.println("\t" + proc.toStringDetails());
} else {
// should be a 'delete' put
out.println("\tmark deleted");
}
}
out.println("cell total size sum: " + cell.heapSize());
}
out.println("edit heap size: " + edit.heapSize());
out.println("position: " + reader.getPosition());
}
}
}
use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class RSRpcServices method replay.
/**
* Replay the given changes when distributedLogReplay WAL edits from a failed RS. The guarantee is
* that the given mutations will be durable on the receiving RS if this method returns without any
* exception.
* @param controller the RPC controller
* @param request the request
* @deprecated Since 3.0.0, will be removed in 4.0.0. Not used any more, put here only for
* compatibility with old region replica implementation. Now we will use
* {@code replicateToReplica} method instead.
*/
@Deprecated
@Override
@QosPriority(priority = HConstants.REPLAY_QOS)
public ReplicateWALEntryResponse replay(final RpcController controller, final ReplicateWALEntryRequest request) throws ServiceException {
long before = EnvironmentEdgeManager.currentTime();
CellScanner cells = getAndReset(controller);
try {
checkOpen();
List<WALEntry> entries = request.getEntryList();
if (entries == null || entries.isEmpty()) {
// empty input
return ReplicateWALEntryResponse.newBuilder().build();
}
ByteString regionName = entries.get(0).getKey().getEncodedRegionName();
HRegion region = server.getRegionByEncodedName(regionName.toStringUtf8());
RegionCoprocessorHost coprocessorHost = ServerRegionReplicaUtil.isDefaultReplica(region.getRegionInfo()) ? region.getCoprocessorHost() : // do not invoke coprocessors if this is a secondary region replica
null;
List<Pair<WALKey, WALEdit>> walEntries = new ArrayList<>();
// Skip adding the edits to WAL if this is a secondary region replica
boolean isPrimary = RegionReplicaUtil.isDefaultReplica(region.getRegionInfo());
Durability durability = isPrimary ? Durability.USE_DEFAULT : Durability.SKIP_WAL;
for (WALEntry entry : entries) {
if (!regionName.equals(entry.getKey().getEncodedRegionName())) {
throw new NotServingRegionException("Replay request contains entries from multiple " + "regions. First region:" + regionName.toStringUtf8() + " , other region:" + entry.getKey().getEncodedRegionName());
}
if (server.nonceManager != null && isPrimary) {
long nonceGroup = entry.getKey().hasNonceGroup() ? entry.getKey().getNonceGroup() : HConstants.NO_NONCE;
long nonce = entry.getKey().hasNonce() ? entry.getKey().getNonce() : HConstants.NO_NONCE;
server.nonceManager.reportOperationFromWal(nonceGroup, nonce, entry.getKey().getWriteTime());
}
Pair<WALKey, WALEdit> walEntry = (coprocessorHost == null) ? null : new Pair<>();
List<MutationReplay> edits = WALSplitUtil.getMutationsFromWALEntry(entry, cells, walEntry, durability);
if (coprocessorHost != null) {
// KeyValue.
if (coprocessorHost.preWALRestore(region.getRegionInfo(), walEntry.getFirst(), walEntry.getSecond())) {
// if bypass this log entry, ignore it ...
continue;
}
walEntries.add(walEntry);
}
if (edits != null && !edits.isEmpty()) {
// HBASE-17924
// sort to improve lock efficiency
Collections.sort(edits, (v1, v2) -> Row.COMPARATOR.compare(v1.mutation, v2.mutation));
long replaySeqId = (entry.getKey().hasOrigSequenceNumber()) ? entry.getKey().getOrigSequenceNumber() : entry.getKey().getLogSequenceNumber();
OperationStatus[] result = doReplayBatchOp(region, edits, replaySeqId);
// check if it's a partial success
for (int i = 0; result != null && i < result.length; i++) {
if (result[i] != OperationStatus.SUCCESS) {
throw new IOException(result[i].getExceptionMsg());
}
}
}
}
// sync wal at the end because ASYNC_WAL is used above
WAL wal = region.getWAL();
if (wal != null) {
wal.sync();
}
if (coprocessorHost != null) {
for (Pair<WALKey, WALEdit> entry : walEntries) {
coprocessorHost.postWALRestore(region.getRegionInfo(), entry.getFirst(), entry.getSecond());
}
}
return ReplicateWALEntryResponse.newBuilder().build();
} catch (IOException ie) {
throw new ServiceException(ie);
} finally {
final MetricsRegionServer metricsRegionServer = server.getMetrics();
if (metricsRegionServer != null) {
metricsRegionServer.updateReplay(EnvironmentEdgeManager.currentTime() - before);
}
}
}
use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class TestWALRecordReader method testPartialRead.
/**
* Test partial reads from the WALs based on passed time range.
*/
@Test
public void testPartialRead() throws Exception {
final WALFactory walfactory = new WALFactory(conf, getName());
WAL log = walfactory.getWAL(info);
// This test depends on timestamp being millisecond based and the filename of the WAL also
// being millisecond based.
long ts = EnvironmentEdgeManager.currentTime();
WALEdit edit = new WALEdit();
edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"), ts, value));
log.appendData(info, getWalKeyImpl(ts, scopes), edit);
edit = new WALEdit();
edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), ts + 1, value));
log.appendData(info, getWalKeyImpl(ts + 1, scopes), edit);
log.sync();
Threads.sleep(10);
LOG.info("Before 1st WAL roll " + log.toString());
log.rollWriter();
LOG.info("Past 1st WAL roll " + log.toString());
Thread.sleep(1);
long ts1 = EnvironmentEdgeManager.currentTime();
edit = new WALEdit();
edit.add(new KeyValue(rowName, family, Bytes.toBytes("3"), ts1 + 1, value));
log.appendData(info, getWalKeyImpl(ts1 + 1, scopes), edit);
edit = new WALEdit();
edit.add(new KeyValue(rowName, family, Bytes.toBytes("4"), ts1 + 2, value));
log.appendData(info, getWalKeyImpl(ts1 + 2, scopes), edit);
log.sync();
log.shutdown();
walfactory.shutdown();
LOG.info("Closed WAL " + log.toString());
WALInputFormat input = new WALInputFormat();
Configuration jobConf = new Configuration(conf);
jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
jobConf.setLong(WALInputFormat.END_TIME_KEY, ts);
// Only 1st file is considered, and only its 1st entry is in-range.
List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
assertEquals(1, splits.size());
testSplit(splits.get(0), Bytes.toBytes("1"));
jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1 + 1);
splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
assertEquals(2, splits.size());
// Both entries from first file are in-range.
testSplit(splits.get(0), Bytes.toBytes("1"), Bytes.toBytes("2"));
// Only the 1st entry from the 2nd file is in-range.
testSplit(splits.get(1), Bytes.toBytes("3"));
jobConf.setLong(WALInputFormat.START_TIME_KEY, ts + 1);
jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1 + 1);
splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
assertEquals(1, splits.size());
// Only the 1st entry from the 2nd file is in-range.
testSplit(splits.get(0), Bytes.toBytes("3"));
}
use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class TestWALRecordReader method testWALRecordReader.
/**
* Test basic functionality
*/
@Test
public void testWALRecordReader() throws Exception {
final WALFactory walfactory = new WALFactory(conf, getName());
WAL log = walfactory.getWAL(info);
byte[] value = Bytes.toBytes("value");
WALEdit edit = new WALEdit();
edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"), EnvironmentEdgeManager.currentTime(), value));
long txid = log.appendData(info, getWalKeyImpl(EnvironmentEdgeManager.currentTime(), scopes), edit);
log.sync(txid);
// make sure 2nd log gets a later timestamp
Thread.sleep(1);
long secondTs = EnvironmentEdgeManager.currentTime();
log.rollWriter();
edit = new WALEdit();
edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), EnvironmentEdgeManager.currentTime(), value));
txid = log.appendData(info, getWalKeyImpl(EnvironmentEdgeManager.currentTime(), scopes), edit);
log.sync(txid);
log.shutdown();
walfactory.shutdown();
long thirdTs = EnvironmentEdgeManager.currentTime();
// should have 2 log files now
WALInputFormat input = new WALInputFormat();
Configuration jobConf = new Configuration(conf);
jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
// make sure both logs are found
List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
assertEquals(2, splits.size());
// should return exactly one KV
testSplit(splits.get(0), Bytes.toBytes("1"));
// same for the 2nd split
testSplit(splits.get(1), Bytes.toBytes("2"));
// now test basic time ranges:
// set an endtime, the 2nd log file can be ignored completely.
jobConf.setLong(WALInputFormat.END_TIME_KEY, secondTs - 1);
splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
assertEquals(1, splits.size());
testSplit(splits.get(0), Bytes.toBytes("1"));
// now set a start time
jobConf.setLong(WALInputFormat.END_TIME_KEY, Long.MAX_VALUE);
jobConf.setLong(WALInputFormat.START_TIME_KEY, thirdTs);
splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
assertTrue(splits.isEmpty());
}
use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class HRegion method replayRecoveredEdits.
/**
* @param edits File of recovered edits.
* @param maxSeqIdInStores Maximum sequenceid found in each store. Edits in wal must be larger
* than this to be replayed for each store.
* @return the sequence id of the last edit added to this region out of the recovered edits log or
* <code>minSeqId</code> if nothing added from editlogs.
*/
private long replayRecoveredEdits(final Path edits, Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter, FileSystem fs) throws IOException {
String msg = "Replaying edits from " + edits;
LOG.info(msg);
MonitoredTask status = TaskMonitor.get().createStatus(msg);
status.setStatus("Opening recovered edits");
WAL.Reader reader = null;
try {
reader = WALFactory.createReader(fs, edits, conf);
long currentEditSeqId = -1;
long currentReplaySeqId = -1;
long firstSeqIdInLog = -1;
long skippedEdits = 0;
long editsCount = 0;
long intervalEdits = 0;
WAL.Entry entry;
HStore store = null;
boolean reported_once = false;
ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();
try {
// How many edits seen before we check elapsed time
int interval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
// How often to send a progress report (default 1/2 master timeout)
int period = this.conf.getInt("hbase.hstore.report.period", 300000);
long lastReport = EnvironmentEdgeManager.currentTime();
if (coprocessorHost != null) {
coprocessorHost.preReplayWALs(this.getRegionInfo(), edits);
}
while ((entry = reader.next()) != null) {
WALKey key = entry.getKey();
WALEdit val = entry.getEdit();
if (ng != null) {
// some test, or nonces disabled
ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
}
if (reporter != null) {
intervalEdits += val.size();
if (intervalEdits >= interval) {
// Number of edits interval reached
intervalEdits = 0;
long cur = EnvironmentEdgeManager.currentTime();
if (lastReport + period <= cur) {
status.setStatus("Replaying edits..." + " skipped=" + skippedEdits + " edits=" + editsCount);
// Timeout reached
if (!reporter.progress()) {
msg = "Progressable reporter failed, stopping replay for region " + this;
LOG.warn(msg);
status.abort(msg);
throw new IOException(msg);
}
reported_once = true;
lastReport = cur;
}
}
}
if (firstSeqIdInLog == -1) {
firstSeqIdInLog = key.getSequenceId();
}
if (currentEditSeqId > key.getSequenceId()) {
// when this condition is true, it means we have a serious defect because we need to
// maintain increasing SeqId for WAL edits per region
LOG.error(getRegionInfo().getEncodedName() + " : " + "Found decreasing SeqId. PreId=" + currentEditSeqId + " key=" + key + "; edit=" + val);
} else {
currentEditSeqId = key.getSequenceId();
}
currentReplaySeqId = (key.getOrigLogSeqNum() > 0) ? key.getOrigLogSeqNum() : currentEditSeqId;
// instead of a KeyValue.
if (coprocessorHost != null) {
status.setStatus("Running pre-WAL-restore hook in coprocessors");
if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
// if bypass this wal entry, ignore it ...
continue;
}
}
boolean checkRowWithinBoundary = false;
// Check this edit is for this region.
if (!Bytes.equals(key.getEncodedRegionName(), this.getRegionInfo().getEncodedNameAsBytes())) {
checkRowWithinBoundary = true;
}
boolean flush = false;
MemStoreSizing memStoreSizing = new NonThreadSafeMemStoreSizing();
for (Cell cell : val.getCells()) {
// METACOLUMN info such as HBASE::CACHEFLUSH entries
if (WALEdit.isMetaEditFamily(cell)) {
// if region names don't match, skipp replaying compaction marker
if (!checkRowWithinBoundary) {
// this is a special edit, we should handle it
CompactionDescriptor compaction = WALEdit.getCompaction(cell);
if (compaction != null) {
// replay the compaction
replayWALCompactionMarker(compaction, false, true, Long.MAX_VALUE);
}
}
skippedEdits++;
continue;
}
// Figure which store the edit is meant for.
if (store == null || !CellUtil.matchingFamily(cell, store.getColumnFamilyDescriptor().getName())) {
store = getStore(cell);
}
if (store == null) {
// This should never happen. Perhaps schema was changed between
// crash and redeploy?
LOG.warn("No family for cell {} in region {}", cell, this);
skippedEdits++;
continue;
}
if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
LOG.warn("Row of {} is not within region boundary for region {}", cell, this);
skippedEdits++;
continue;
}
// Now, figure if we should skip this edit.
if (key.getSequenceId() <= maxSeqIdInStores.get(store.getColumnFamilyDescriptor().getName())) {
skippedEdits++;
continue;
}
PrivateCellUtil.setSequenceId(cell, currentReplaySeqId);
restoreEdit(store, cell, memStoreSizing);
editsCount++;
}
MemStoreSize mss = memStoreSizing.getMemStoreSize();
incMemStoreSize(mss);
flush = isFlushSize(this.memStoreSizing.getMemStoreSize());
if (flush) {
internalFlushcache(null, currentEditSeqId, stores.values(), status, false, FlushLifeCycleTracker.DUMMY);
}
if (coprocessorHost != null) {
coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
}
}
if (coprocessorHost != null) {
coprocessorHost.postReplayWALs(this.getRegionInfo(), edits);
}
} catch (EOFException eof) {
Path p = WALSplitUtil.moveAsideBadEditsFile(walFS, edits);
msg = "EnLongAddered EOF. Most likely due to Master failure during " + "wal splitting, so we have this data in another edit. Continuing, but renaming " + edits + " as " + p + " for region " + this;
LOG.warn(msg, eof);
status.abort(msg);
} catch (IOException ioe) {
// then this problem is idempotent and retrying won't help
if (ioe.getCause() instanceof ParseException) {
Path p = WALSplitUtil.moveAsideBadEditsFile(walFS, edits);
msg = "File corruption enLongAddered! " + "Continuing, but renaming " + edits + " as " + p;
LOG.warn(msg, ioe);
status.setStatus(msg);
} else {
status.abort(StringUtils.stringifyException(ioe));
// checksum exception on one datanode, etc). throw & retry
throw ioe;
}
}
if (reporter != null && !reported_once) {
reporter.progress();
}
msg = "Applied " + editsCount + ", skipped " + skippedEdits + ", firstSequenceIdInLog=" + firstSeqIdInLog + ", maxSequenceIdInLog=" + currentEditSeqId + ", path=" + edits;
status.markComplete(msg);
LOG.debug(msg);
return currentEditSeqId;
} finally {
status.cleanup();
if (reader != null) {
reader.close();
}
}
}
Aggregations