use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.
the class Compactor method getFileDetails.
/**
* Extracts some details about the files to compact that are commonly needed by compactors.
* @param filesToCompact Files.
* @param allFiles Whether all files are included for compaction
* @return The result.
*/
protected FileDetails getFileDetails(Collection<StoreFile> filesToCompact, boolean allFiles) throws IOException {
FileDetails fd = new FileDetails();
long oldestHFileTimeStampToKeepMVCC = System.currentTimeMillis() - (1000L * 60 * 60 * 24 * this.keepSeqIdPeriod);
for (StoreFile file : filesToCompact) {
if (allFiles && (file.getModificationTimeStamp() < oldestHFileTimeStampToKeepMVCC)) {
// MVCC value to keep
if (fd.minSeqIdToKeep < file.getMaxMemstoreTS()) {
fd.minSeqIdToKeep = file.getMaxMemstoreTS();
}
}
long seqNum = file.getMaxSequenceId();
fd.maxSeqId = Math.max(fd.maxSeqId, seqNum);
StoreFileReader r = file.getReader();
if (r == null) {
LOG.warn("Null reader for " + file.getPath());
continue;
}
// NOTE: use getEntries when compacting instead of getFilterEntries, otherwise under-sized
// blooms can cause progress to be miscalculated or if the user switches bloom
// type (e.g. from ROW to ROWCOL)
long keyCount = r.getEntries();
fd.maxKeyCount += keyCount;
// calculate the latest MVCC readpoint in any of the involved store files
Map<byte[], byte[]> fileInfo = r.loadFileInfo();
byte[] tmp = null;
// SeqId number.
if (r.isBulkLoaded()) {
fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, r.getSequenceID());
} else {
tmp = fileInfo.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
if (tmp != null) {
fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, Bytes.toLong(tmp));
}
}
tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN);
if (tmp != null) {
fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp));
}
// If required, calculate the earliest put timestamp of all involved storefiles.
// This is used to remove family delete marker during compaction.
long earliestPutTs = 0;
if (allFiles) {
tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS);
if (tmp == null) {
// There's a file with no information, must be an old one
// assume we have very old puts
fd.earliestPutTs = earliestPutTs = HConstants.OLDEST_TIMESTAMP;
} else {
earliestPutTs = Bytes.toLong(tmp);
fd.earliestPutTs = Math.min(fd.earliestPutTs, earliestPutTs);
}
}
tmp = fileInfo.get(StoreFile.TIMERANGE_KEY);
TimeRangeTracker trt = TimeRangeTracker.getTimeRangeTracker(tmp);
fd.latestPutTs = trt == null ? HConstants.LATEST_TIMESTAMP : trt.getMax();
if (LOG.isDebugEnabled()) {
LOG.debug("Compacting " + file + ", keycount=" + keyCount + ", bloomtype=" + r.getBloomFilterType().toString() + ", size=" + TraditionalBinaryPrefix.long2String(r.length(), "", 1) + ", encoding=" + r.getHFileReader().getDataBlockEncoding() + ", seqNum=" + seqNum + (allFiles ? ", earliestPutTs=" + earliestPutTs : ""));
}
}
return fd;
}
use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.
the class HFilePrettyPrinter method printMeta.
private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo) throws IOException {
out.println("Block index size as per heapsize: " + reader.indexSize());
out.println(asSeparateLines(reader.toString()));
out.println("Trailer:\n " + asSeparateLines(reader.getTrailer().toString()));
out.println("Fileinfo:");
for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
if (Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY) || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT) || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS) || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY) || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS) || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY)) {
out.println(Bytes.toLong(e.getValue()));
} else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) {
TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue());
out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
} else if (Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN) || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN) || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION) || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN)) {
out.println(Bytes.toInt(e.getValue()));
} else if (Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY)) {
out.println(Bytes.toBoolean(e.getValue()));
} else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) {
out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString());
} else {
out.println(Bytes.toStringBinary(e.getValue()));
}
}
try {
out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString));
} catch (Exception e) {
out.println("Unable to retrieve the midkey");
}
// Printing general bloom information
DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
BloomFilter bloomFilter = null;
if (bloomMeta != null)
bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
out.println("Bloom filter:");
if (bloomFilter != null) {
out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
} else {
out.println(FOUR_SPACES + "Not present");
}
// Printing delete bloom information
bloomMeta = reader.getDeleteBloomFilterMetadata();
bloomFilter = null;
if (bloomMeta != null)
bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
out.println("Delete Family Bloom filter:");
if (bloomFilter != null) {
out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
} else {
out.println(FOUR_SPACES + "Not present");
}
}
use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.
the class TestHFileOutputFormat2 method test_TIMERANGE.
/*
* Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
* metadata used by time-restricted scans.
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void test_TIMERANGE() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, Cell> writer = null;
TaskAttemptContext context = null;
Path dir = util.getDataTestDir("test_TIMERANGE_present");
LOG.info("Timerange dir writing to dir: " + dir);
try {
// build a record writer using HFileOutputFormat2
Job job = new Job(conf);
FileOutputFormat.setOutputPath(job, dir);
context = createTestTaskAttemptContext(job);
HFileOutputFormat2 hof = new HFileOutputFormat2();
writer = hof.getRecordWriter(context);
// Pass two key values with explicit times stamps
final byte[] b = Bytes.toBytes("b");
// value 1 with timestamp 2000
KeyValue kv = new KeyValue(b, b, b, 2000, b);
KeyValue original = kv.clone();
writer.write(new ImmutableBytesWritable(), kv);
assertEquals(original, kv);
// value 2 with timestamp 1000
kv = new KeyValue(b, b, b, 1000, b);
original = kv.clone();
writer.write(new ImmutableBytesWritable(), kv);
assertEquals(original, kv);
// verify that the file has the proper FileInfo.
writer.close(context);
// the generated file lives 1 directory down from the attempt directory
// and is the only file, e.g.
// _attempt__0000_r_000000_0/b/1979617994050536795
FileSystem fs = FileSystem.get(conf);
Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
FileStatus[] sub1 = fs.listStatus(attemptDirectory);
FileStatus[] file = fs.listStatus(sub1[0].getPath());
// open as HFile Reader and pull out TIMERANGE FileInfo.
HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf);
Map<byte[], byte[]> finfo = rd.getHFileInfo();
byte[] range = finfo.get(Bytes.toBytes("TIMERANGE"));
assertNotNull(range);
// unmarshall and check values.
TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(range);
LOG.info(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
assertEquals(1000, timeRangeTracker.getMin());
assertEquals(2000, timeRangeTracker.getMax());
rd.close();
} finally {
if (writer != null && context != null)
writer.close(context);
dir.getFileSystem(conf).delete(dir, true);
}
}
Aggregations