Search in sources :

Example 1 with TimeRangeTracker

use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.

the class Compactor method getFileDetails.

/**
   * Extracts some details about the files to compact that are commonly needed by compactors.
   * @param filesToCompact Files.
   * @param allFiles Whether all files are included for compaction
   * @return The result.
   */
protected FileDetails getFileDetails(Collection<StoreFile> filesToCompact, boolean allFiles) throws IOException {
    FileDetails fd = new FileDetails();
    long oldestHFileTimeStampToKeepMVCC = System.currentTimeMillis() - (1000L * 60 * 60 * 24 * this.keepSeqIdPeriod);
    for (StoreFile file : filesToCompact) {
        if (allFiles && (file.getModificationTimeStamp() < oldestHFileTimeStampToKeepMVCC)) {
            // MVCC value to keep
            if (fd.minSeqIdToKeep < file.getMaxMemstoreTS()) {
                fd.minSeqIdToKeep = file.getMaxMemstoreTS();
            }
        }
        long seqNum = file.getMaxSequenceId();
        fd.maxSeqId = Math.max(fd.maxSeqId, seqNum);
        StoreFileReader r = file.getReader();
        if (r == null) {
            LOG.warn("Null reader for " + file.getPath());
            continue;
        }
        // NOTE: use getEntries when compacting instead of getFilterEntries, otherwise under-sized
        // blooms can cause progress to be miscalculated or if the user switches bloom
        // type (e.g. from ROW to ROWCOL)
        long keyCount = r.getEntries();
        fd.maxKeyCount += keyCount;
        // calculate the latest MVCC readpoint in any of the involved store files
        Map<byte[], byte[]> fileInfo = r.loadFileInfo();
        byte[] tmp = null;
        // SeqId number.
        if (r.isBulkLoaded()) {
            fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, r.getSequenceID());
        } else {
            tmp = fileInfo.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
            if (tmp != null) {
                fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, Bytes.toLong(tmp));
            }
        }
        tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN);
        if (tmp != null) {
            fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp));
        }
        // If required, calculate the earliest put timestamp of all involved storefiles.
        // This is used to remove family delete marker during compaction.
        long earliestPutTs = 0;
        if (allFiles) {
            tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS);
            if (tmp == null) {
                // There's a file with no information, must be an old one
                // assume we have very old puts
                fd.earliestPutTs = earliestPutTs = HConstants.OLDEST_TIMESTAMP;
            } else {
                earliestPutTs = Bytes.toLong(tmp);
                fd.earliestPutTs = Math.min(fd.earliestPutTs, earliestPutTs);
            }
        }
        tmp = fileInfo.get(StoreFile.TIMERANGE_KEY);
        TimeRangeTracker trt = TimeRangeTracker.getTimeRangeTracker(tmp);
        fd.latestPutTs = trt == null ? HConstants.LATEST_TIMESTAMP : trt.getMax();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Compacting " + file + ", keycount=" + keyCount + ", bloomtype=" + r.getBloomFilterType().toString() + ", size=" + TraditionalBinaryPrefix.long2String(r.length(), "", 1) + ", encoding=" + r.getHFileReader().getDataBlockEncoding() + ", seqNum=" + seqNum + (allFiles ? ", earliestPutTs=" + earliestPutTs : ""));
        }
    }
    return fd;
}
Also used : StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) TimeRangeTracker(org.apache.hadoop.hbase.regionserver.TimeRangeTracker) StoreFileReader(org.apache.hadoop.hbase.regionserver.StoreFileReader)

Example 2 with TimeRangeTracker

use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.

the class HFilePrettyPrinter method printMeta.

private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo) throws IOException {
    out.println("Block index size as per heapsize: " + reader.indexSize());
    out.println(asSeparateLines(reader.toString()));
    out.println("Trailer:\n    " + asSeparateLines(reader.getTrailer().toString()));
    out.println("Fileinfo:");
    for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
        out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
        if (Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY) || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT) || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS) || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY) || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS) || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY)) {
            out.println(Bytes.toLong(e.getValue()));
        } else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) {
            TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue());
            out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
        } else if (Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN) || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN) || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION) || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN)) {
            out.println(Bytes.toInt(e.getValue()));
        } else if (Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY)) {
            out.println(Bytes.toBoolean(e.getValue()));
        } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) {
            out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString());
        } else {
            out.println(Bytes.toStringBinary(e.getValue()));
        }
    }
    try {
        out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString));
    } catch (Exception e) {
        out.println("Unable to retrieve the midkey");
    }
    // Printing general bloom information
    DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
    BloomFilter bloomFilter = null;
    if (bloomMeta != null)
        bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
    out.println("Bloom filter:");
    if (bloomFilter != null) {
        out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
    } else {
        out.println(FOUR_SPACES + "Not present");
    }
    // Printing delete bloom information
    bloomMeta = reader.getDeleteBloomFilterMetadata();
    bloomFilter = null;
    if (bloomMeta != null)
        bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
    out.println("Delete Family Bloom filter:");
    if (bloomFilter != null) {
        out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
    } else {
        out.println(FOUR_SPACES + "Not present");
    }
}
Also used : DataInput(java.io.DataInput) TimeRangeTracker(org.apache.hadoop.hbase.regionserver.TimeRangeTracker) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) PrivateCellUtil(org.apache.hadoop.hbase.PrivateCellUtil) CellUtil(org.apache.hadoop.hbase.CellUtil) ParseException(org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException) IOException(java.io.IOException) BloomFilter(org.apache.hadoop.hbase.util.BloomFilter)

Example 3 with TimeRangeTracker

use of org.apache.hadoop.hbase.regionserver.TimeRangeTracker in project hbase by apache.

the class TestHFileOutputFormat2 method test_TIMERANGE.

/*
   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
   * metadata used by time-restricted scans.
   */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void test_TIMERANGE() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_TIMERANGE_present");
    LOG.info("Timerange dir writing to dir: " + dir);
    try {
        // build a record writer using HFileOutputFormat2
        Job job = new Job(conf);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        // Pass two key values with explicit times stamps
        final byte[] b = Bytes.toBytes("b");
        // value 1 with timestamp 2000
        KeyValue kv = new KeyValue(b, b, b, 2000, b);
        KeyValue original = kv.clone();
        writer.write(new ImmutableBytesWritable(), kv);
        assertEquals(original, kv);
        // value 2 with timestamp 1000
        kv = new KeyValue(b, b, b, 1000, b);
        original = kv.clone();
        writer.write(new ImmutableBytesWritable(), kv);
        assertEquals(original, kv);
        // verify that the file has the proper FileInfo.
        writer.close(context);
        // the generated file lives 1 directory down from the attempt directory
        // and is the only file, e.g.
        // _attempt__0000_r_000000_0/b/1979617994050536795
        FileSystem fs = FileSystem.get(conf);
        Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
        FileStatus[] sub1 = fs.listStatus(attemptDirectory);
        FileStatus[] file = fs.listStatus(sub1[0].getPath());
        // open as HFile Reader and pull out TIMERANGE FileInfo.
        HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf);
        Map<byte[], byte[]> finfo = rd.getHFileInfo();
        byte[] range = finfo.get(Bytes.toBytes("TIMERANGE"));
        assertNotNull(range);
        // unmarshall and check values.
        TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(range);
        LOG.info(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
        assertEquals(1000, timeRangeTracker.getMin());
        assertEquals(2000, timeRangeTracker.getMax());
        rd.close();
    } finally {
        if (writer != null && context != null)
            writer.close(context);
        dir.getFileSystem(conf).delete(dir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) KeyValue(org.apache.hadoop.hbase.KeyValue) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) FileSystem(org.apache.hadoop.fs.FileSystem) TestHRegionFileSystem(org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) TimeRangeTracker(org.apache.hadoop.hbase.regionserver.TimeRangeTracker) Job(org.apache.hadoop.mapreduce.Job) HFile(org.apache.hadoop.hbase.io.hfile.HFile) Cell(org.apache.hadoop.hbase.Cell) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

TimeRangeTracker (org.apache.hadoop.hbase.regionserver.TimeRangeTracker)3 DataInput (java.io.DataInput)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 SortedMap (java.util.SortedMap)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)1 Path (org.apache.hadoop.fs.Path)1 Cell (org.apache.hadoop.hbase.Cell)1 CellUtil (org.apache.hadoop.hbase.CellUtil)1 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)1 KeyValue (org.apache.hadoop.hbase.KeyValue)1 PrivateCellUtil (org.apache.hadoop.hbase.PrivateCellUtil)1 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)1 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)1 HFile (org.apache.hadoop.hbase.io.hfile.HFile)1 Reader (org.apache.hadoop.hbase.io.hfile.HFile.Reader)1