Search in sources :

Example 1 with TimeRangeTracker

use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.

the class Compactor method getFileDetails.

   * Extracts some details about the files to compact that are commonly needed by compactors.
   * @param filesToCompact Files.
   * @param allFiles Whether all files are included for compaction
   * @return The result.
protected FileDetails getFileDetails(Collection<StoreFile> filesToCompact, boolean allFiles) throws IOException {
    FileDetails fd = new FileDetails();
    long oldestHFileTimeStampToKeepMVCC = System.currentTimeMillis() - (1000L * 60 * 60 * 24 * this.keepSeqIdPeriod);
    for (StoreFile file : filesToCompact) {
        if (allFiles && (file.getModificationTimeStamp() < oldestHFileTimeStampToKeepMVCC)) {
            // MVCC value to keep
            if (fd.minSeqIdToKeep < file.getMaxMemstoreTS()) {
                fd.minSeqIdToKeep = file.getMaxMemstoreTS();
        long seqNum = file.getMaxSequenceId();
        fd.maxSeqId = Math.max(fd.maxSeqId, seqNum);
        StoreFileReader r = file.getReader();
        if (r == null) {
            LOG.warn("Null reader for " + file.getPath());
        // NOTE: use getEntries when compacting instead of getFilterEntries, otherwise under-sized
        // blooms can cause progress to be miscalculated or if the user switches bloom
        // type (e.g. from ROW to ROWCOL)
        long keyCount = r.getEntries();
        fd.maxKeyCount += keyCount;
        // calculate the latest MVCC readpoint in any of the involved store files
        Map<byte[], byte[]> fileInfo = r.loadFileInfo();
        byte[] tmp = null;
        // SeqId number.
        if (r.isBulkLoaded()) {
            fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, r.getSequenceID());
        } else {
            tmp = fileInfo.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
            if (tmp != null) {
                fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, Bytes.toLong(tmp));
        tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN);
        if (tmp != null) {
            fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp));
        // If required, calculate the earliest put timestamp of all involved storefiles.
        // This is used to remove family delete marker during compaction.
        long earliestPutTs = 0;
        if (allFiles) {
            tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS);
            if (tmp == null) {
                // There's a file with no information, must be an old one
                // assume we have very old puts
                fd.earliestPutTs = earliestPutTs = HConstants.OLDEST_TIMESTAMP;
            } else {
                earliestPutTs = Bytes.toLong(tmp);
                fd.earliestPutTs = Math.min(fd.earliestPutTs, earliestPutTs);
        tmp = fileInfo.get(StoreFile.TIMERANGE_KEY);
        TimeRangeTracker trt = TimeRangeTracker.getTimeRangeTracker(tmp);
        fd.latestPutTs = trt == null ? HConstants.LATEST_TIMESTAMP : trt.getMax();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Compacting " + file + ", keycount=" + keyCount + ", bloomtype=" + r.getBloomFilterType().toString() + ", size=" + TraditionalBinaryPrefix.long2String(r.length(), "", 1) + ", encoding=" + r.getHFileReader().getDataBlockEncoding() + ", seqNum=" + seqNum + (allFiles ? ", earliestPutTs=" + earliestPutTs : ""));
    return fd;
Also used : StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) TimeRangeTracker(org.apache.hadoop.hbase.regionserver.TimeRangeTracker) StoreFileReader(org.apache.hadoop.hbase.regionserver.StoreFileReader)

Example 2 with TimeRangeTracker

use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.

the class HFilePrettyPrinter method printMeta.

private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo) throws IOException {
    out.println("Block index size as per heapsize: " + reader.indexSize());
    out.println("Trailer:\n    " + asSeparateLines(reader.getTrailer().toString()));
    for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
        out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
        if (Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY) || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT) || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS) || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY) || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS) || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY)) {
        } else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) {
            TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue());
            out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
        } else if (Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN) || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN) || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION) || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN)) {
        } else if (Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY)) {
        } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) {
            out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString());
        } else {
    try {
        out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString));
    } catch (Exception e) {
        out.println("Unable to retrieve the midkey");
    // Printing general bloom information
    DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
    BloomFilter bloomFilter = null;
    if (bloomMeta != null)
        bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
    out.println("Bloom filter:");
    if (bloomFilter != null) {
        out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
    } else {
        out.println(FOUR_SPACES + "Not present");
    // Printing delete bloom information
    bloomMeta = reader.getDeleteBloomFilterMetadata();
    bloomFilter = null;
    if (bloomMeta != null)
        bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
    out.println("Delete Family Bloom filter:");
    if (bloomFilter != null) {
        out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
    } else {
        out.println(FOUR_SPACES + "Not present");
Also used : DataInput( TimeRangeTracker(org.apache.hadoop.hbase.regionserver.TimeRangeTracker) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) PrivateCellUtil(org.apache.hadoop.hbase.PrivateCellUtil) CellUtil(org.apache.hadoop.hbase.CellUtil) ParseException( IOException( BloomFilter(org.apache.hadoop.hbase.util.BloomFilter)

Example 3 with TimeRangeTracker

use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.

the class TestHFileOutputFormat2 method test_TIMERANGE.

   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
   * metadata used by time-restricted scans.
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
public void test_TIMERANGE() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_TIMERANGE_present");"Timerange dir writing to dir: " + dir);
    try {
        // build a record writer using HFileOutputFormat2
        Job job = new Job(conf);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        // Pass two key values with explicit times stamps
        final byte[] b = Bytes.toBytes("b");
        // value 1 with timestamp 2000
        KeyValue kv = new KeyValue(b, b, b, 2000, b);
        KeyValue original = kv.clone();
        writer.write(new ImmutableBytesWritable(), kv);
        assertEquals(original, kv);
        // value 2 with timestamp 1000
        kv = new KeyValue(b, b, b, 1000, b);
        original = kv.clone();
        writer.write(new ImmutableBytesWritable(), kv);
        assertEquals(original, kv);
        // verify that the file has the proper FileInfo.
        // the generated file lives 1 directory down from the attempt directory
        // and is the only file, e.g.
        // _attempt__0000_r_000000_0/b/1979617994050536795
        FileSystem fs = FileSystem.get(conf);
        Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
        FileStatus[] sub1 = fs.listStatus(attemptDirectory);
        FileStatus[] file = fs.listStatus(sub1[0].getPath());
        // open as HFile Reader and pull out TIMERANGE FileInfo.
        HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf);
        Map<byte[], byte[]> finfo = rd.getHFileInfo();
        byte[] range = finfo.get(Bytes.toBytes("TIMERANGE"));
        // unmarshall and check values.
        TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(range); + "...." + timeRangeTracker.getMax());
        assertEquals(1000, timeRangeTracker.getMin());
        assertEquals(2000, timeRangeTracker.getMax());
    } finally {
        if (writer != null && context != null)
        dir.getFileSystem(conf).delete(dir, true);
Also used : Path(org.apache.hadoop.fs.Path) ImmutableBytesWritable( KeyValue(org.apache.hadoop.hbase.KeyValue) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Reader( FileSystem(org.apache.hadoop.fs.FileSystem) TestHRegionFileSystem(org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) TimeRangeTracker(org.apache.hadoop.hbase.regionserver.TimeRangeTracker) Job(org.apache.hadoop.mapreduce.Job) HFile( Cell(org.apache.hadoop.hbase.Cell) CacheConfig( Ignore(org.junit.Ignore) Test(org.junit.Test)


TimeRangeTracker (org.apache.hadoop.hbase.regionserver.TimeRangeTracker)3 DataInput ( IOException ( HashMap (java.util.HashMap)1 Map (java.util.Map)1 SortedMap (java.util.SortedMap)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)1 Path (org.apache.hadoop.fs.Path)1 Cell (org.apache.hadoop.hbase.Cell)1 CellUtil (org.apache.hadoop.hbase.CellUtil)1 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)1 KeyValue (org.apache.hadoop.hbase.KeyValue)1 PrivateCellUtil (org.apache.hadoop.hbase.PrivateCellUtil)1 ImmutableBytesWritable ( CacheConfig ( HFile ( Reader (