Search in sources :

Example 1 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class Compactor method getFileDetails.

/**
 * Extracts some details about the files to compact that are commonly needed by compactors.
 * @param filesToCompact Files.
 * @param allFiles Whether all files are included for compaction
 * @parma major If major compaction
 * @return The result.
 */
private FileDetails getFileDetails(Collection<HStoreFile> filesToCompact, boolean allFiles, boolean major) throws IOException {
    FileDetails fd = new FileDetails();
    long oldestHFileTimestampToKeepMVCC = EnvironmentEdgeManager.currentTime() - (1000L * 60 * 60 * 24 * this.keepSeqIdPeriod);
    for (HStoreFile file : filesToCompact) {
        if (allFiles && (file.getModificationTimestamp() < oldestHFileTimestampToKeepMVCC)) {
            // MVCC value to keep
            if (fd.minSeqIdToKeep < file.getMaxMemStoreTS()) {
                fd.minSeqIdToKeep = file.getMaxMemStoreTS();
            }
        }
        long seqNum = file.getMaxSequenceId();
        fd.maxSeqId = Math.max(fd.maxSeqId, seqNum);
        StoreFileReader r = file.getReader();
        if (r == null) {
            LOG.warn("Null reader for " + file.getPath());
            continue;
        }
        // NOTE: use getEntries when compacting instead of getFilterEntries, otherwise under-sized
        // blooms can cause progress to be miscalculated or if the user switches bloom
        // type (e.g. from ROW to ROWCOL)
        long keyCount = r.getEntries();
        fd.maxKeyCount += keyCount;
        // calculate the latest MVCC readpoint in any of the involved store files
        Map<byte[], byte[]> fileInfo = r.loadFileInfo();
        // calculate the total size of the compacted files
        fd.totalCompactedFilesSize += r.length();
        byte[] tmp = null;
        // SeqId number.
        if (r.isBulkLoaded()) {
            fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, r.getSequenceID());
        } else {
            tmp = fileInfo.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
            if (tmp != null) {
                fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, Bytes.toLong(tmp));
            }
        }
        tmp = fileInfo.get(HFileInfo.MAX_TAGS_LEN);
        if (tmp != null) {
            fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp));
        }
        // If required, calculate the earliest put timestamp of all involved storefiles.
        // This is used to remove family delete marker during compaction.
        long earliestPutTs = 0;
        if (allFiles) {
            tmp = fileInfo.get(EARLIEST_PUT_TS);
            if (tmp == null) {
                // There's a file with no information, must be an old one
                // assume we have very old puts
                fd.earliestPutTs = earliestPutTs = PrivateConstants.OLDEST_TIMESTAMP;
            } else {
                earliestPutTs = Bytes.toLong(tmp);
                fd.earliestPutTs = Math.min(fd.earliestPutTs, earliestPutTs);
            }
        }
        tmp = fileInfo.get(TIMERANGE_KEY);
        fd.latestPutTs = tmp == null ? HConstants.LATEST_TIMESTAMP : TimeRangeTracker.parseFrom(tmp).getMax();
        LOG.debug("Compacting {}, keycount={}, bloomtype={}, size={}, " + "encoding={}, compression={}, seqNum={}{}", (file.getPath() == null ? null : file.getPath().getName()), keyCount, r.getBloomFilterType().toString(), TraditionalBinaryPrefix.long2String(r.length(), "", 1), r.getHFileReader().getDataBlockEncoding(), major ? majorCompactionCompression : minorCompactionCompression, seqNum, (allFiles ? ", earliestPutTs=" + earliestPutTs : ""));
    }
    return fd;
}
Also used : HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) StoreFileReader(org.apache.hadoop.hbase.regionserver.StoreFileReader)

Example 2 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class DateTieredCompactionPolicy method selectMinorCompaction.

/**
 * We receive store files sorted in ascending order by seqId then scan the list of files. If the
 * current file has a maxTimestamp older than last known maximum, treat this file as it carries
 * the last known maximum. This way both seqId and timestamp are in the same order. If files carry
 * the same maxTimestamps, they are ordered by seqId. We then reverse the list so they are ordered
 * by seqId and maxTimestamp in descending order and build the time windows. All the out-of-order
 * data into the same compaction windows, guaranteeing contiguous compaction based on sequence id.
 */
public CompactionRequestImpl selectMinorCompaction(ArrayList<HStoreFile> candidateSelection, boolean mayUseOffPeak, boolean mayBeStuck) throws IOException {
    long now = EnvironmentEdgeManager.currentTime();
    long oldestToCompact = getOldestToCompact(comConf.getDateTieredMaxStoreFileAgeMillis(), now);
    List<Pair<HStoreFile, Long>> storefileMaxTimestampPairs = Lists.newArrayListWithCapacity(candidateSelection.size());
    long maxTimestampSeen = Long.MIN_VALUE;
    for (HStoreFile storeFile : candidateSelection) {
        // if there is out-of-order data,
        // we put them in the same window as the last file in increasing order
        maxTimestampSeen = Math.max(maxTimestampSeen, storeFile.getMaximumTimestamp().orElse(Long.MIN_VALUE));
        storefileMaxTimestampPairs.add(new Pair<>(storeFile, maxTimestampSeen));
    }
    Collections.reverse(storefileMaxTimestampPairs);
    CompactionWindow window = getIncomingWindow(now);
    int minThreshold = comConf.getDateTieredIncomingWindowMin();
    PeekingIterator<Pair<HStoreFile, Long>> it = Iterators.peekingIterator(storefileMaxTimestampPairs.iterator());
    while (it.hasNext()) {
        if (window.compareToTimestamp(oldestToCompact) < 0) {
            break;
        }
        int compResult = window.compareToTimestamp(it.peek().getSecond());
        if (compResult > 0) {
            // If the file is too old for the window, switch to the next window
            window = window.nextEarlierWindow();
            minThreshold = comConf.getMinFilesToCompact();
        } else {
            // The file is within the target window
            ArrayList<HStoreFile> fileList = Lists.newArrayList();
            // we tolerate files with future data although it is sub-optimal
            while (it.hasNext() && window.compareToTimestamp(it.peek().getSecond()) <= 0) {
                fileList.add(it.next().getFirst());
            }
            if (fileList.size() >= minThreshold) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Processing files: " + fileList + " for window: " + window);
                }
                DateTieredCompactionRequest request = generateCompactionRequest(fileList, window, mayUseOffPeak, mayBeStuck, minThreshold, now);
                if (request != null) {
                    return request;
                }
            }
        }
    }
    // A non-null file list is expected by HStore
    return new CompactionRequestImpl(Collections.emptyList());
}
Also used : HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) Pair(org.apache.hadoop.hbase.util.Pair)

Example 3 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class ExploringCompactionPolicy method filesInRatio.

/**
 * Check that all files satisfy the constraint
 *      FileSize(i) <= ( Sum(0,N,FileSize(_)) - FileSize(i) ) * Ratio.
 *
 * @param files List of store files to consider as a compaction candidate.
 * @param currentRatio The ratio to use.
 * @return a boolean if these files satisfy the ratio constraints.
 */
private boolean filesInRatio(List<HStoreFile> files, double currentRatio) {
    if (files.size() < 2) {
        return true;
    }
    long totalFileSize = getTotalStoreSize(files);
    for (HStoreFile file : files) {
        long singleFileSize = file.getReader().length();
        long sumAllOtherFileSizes = totalFileSize - singleFileSize;
        if (singleFileSize > sumAllOtherFileSizes * currentRatio) {
            return false;
        }
    }
    return true;
}
Also used : HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile)

Example 4 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class FIFOCompactionPolicy method getExpiredStores.

private Collection<HStoreFile> getExpiredStores(Collection<HStoreFile> files, Collection<HStoreFile> filesCompacting) {
    long currentTime = EnvironmentEdgeManager.currentTime();
    Collection<HStoreFile> expiredStores = new ArrayList<>();
    for (HStoreFile sf : files) {
        if (isEmptyStoreFile(sf) && !filesCompacting.contains(sf)) {
            expiredStores.add(sf);
            continue;
        }
        // Check MIN_VERSIONS is in HStore removeUnneededFiles
        long maxTs = sf.getReader().getMaxTimestamp();
        long maxTtl = storeConfigInfo.getStoreFileTtl();
        if (maxTtl == Long.MAX_VALUE || (currentTime - maxTtl < maxTs)) {
            continue;
        } else if (filesCompacting == null || !filesCompacting.contains(sf)) {
            expiredStores.add(sf);
        }
    }
    return expiredStores;
}
Also used : ArrayList(java.util.ArrayList) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile)

Example 5 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class StripeCompactionPolicy method selectSingleStripeCompaction.

protected StripeCompactionRequest selectSingleStripeCompaction(StripeInformationProvider si, boolean includeL0, boolean canDropDeletesWithoutL0, boolean isOffpeak) throws IOException {
    ArrayList<ImmutableList<HStoreFile>> stripes = si.getStripes();
    int bqIndex = -1;
    List<HStoreFile> bqSelection = null;
    int stripeCount = stripes.size();
    long bqTotalSize = -1;
    for (int i = 0; i < stripeCount; ++i) {
        // If we want to compact L0 to drop deletes, we only want whole-stripe compactions.
        // So, pass includeL0 as 2nd parameter to indicate that.
        List<HStoreFile> selection = selectSimpleCompaction(stripes.get(i), !canDropDeletesWithoutL0 && includeL0, isOffpeak, false);
        if (selection.isEmpty())
            continue;
        long size = 0;
        for (HStoreFile sf : selection) {
            size += sf.getReader().length();
        }
        if (bqSelection == null || selection.size() > bqSelection.size() || (selection.size() == bqSelection.size() && size < bqTotalSize)) {
            bqSelection = selection;
            bqIndex = i;
            bqTotalSize = size;
        }
    }
    if (bqSelection == null) {
        LOG.debug("No good compaction is possible in any stripe");
        return null;
    }
    List<HStoreFile> filesToCompact = new ArrayList<>(bqSelection);
    // See if we can, and need to, split this stripe.
    int targetCount = 1;
    long targetKvs = Long.MAX_VALUE;
    boolean hasAllFiles = filesToCompact.size() == stripes.get(bqIndex).size();
    String splitString = "";
    if (hasAllFiles && bqTotalSize >= config.getSplitSize()) {
        if (includeL0) {
            // So, if we might split, don't compact the stripe with L0.
            return null;
        }
        Pair<Long, Integer> kvsAndCount = estimateTargetKvs(filesToCompact, config.getSplitCount());
        targetKvs = kvsAndCount.getFirst();
        targetCount = kvsAndCount.getSecond();
        splitString = "; the stripe will be split into at most " + targetCount + " stripes with " + targetKvs + " target KVs";
    }
    LOG.debug("Found compaction in a stripe with end key [" + Bytes.toString(si.getEndRow(bqIndex)) + "], with " + filesToCompact.size() + " files of total size " + bqTotalSize + splitString);
    // See if we can drop deletes.
    StripeCompactionRequest req;
    if (includeL0) {
        assert hasAllFiles;
        List<HStoreFile> l0Files = si.getLevel0Files();
        LOG.debug("Adding " + l0Files.size() + " files to compaction to be able to drop deletes");
        ConcatenatedLists<HStoreFile> sfs = new ConcatenatedLists<>();
        sfs.addSublist(filesToCompact);
        sfs.addSublist(l0Files);
        req = new BoundaryStripeCompactionRequest(sfs, si.getStripeBoundaries());
    } else {
        req = new SplitStripeCompactionRequest(filesToCompact, si.getStartRow(bqIndex), si.getEndRow(bqIndex), targetCount, targetKvs);
    }
    if (hasAllFiles && (canDropDeletesWithoutL0 || includeL0)) {
        req.setMajorRange(si.getStartRow(bqIndex), si.getEndRow(bqIndex));
    }
    req.getRequest().setOffPeak(isOffpeak);
    return req;
}
Also used : ImmutableList(org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) ConcatenatedLists(org.apache.hadoop.hbase.util.ConcatenatedLists)

Aggregations

HStoreFile (org.apache.hadoop.hbase.regionserver.HStoreFile)44 ArrayList (java.util.ArrayList)18 Test (org.junit.Test)16 Path (org.apache.hadoop.fs.Path)11 Configuration (org.apache.hadoop.conf.Configuration)8 HStore (org.apache.hadoop.hbase.regionserver.HStore)8 StripeInformationProvider (org.apache.hadoop.hbase.regionserver.compactions.StripeCompactionPolicy.StripeInformationProvider)8 IOException (java.io.IOException)6 OptionalLong (java.util.OptionalLong)6 TableName (org.apache.hadoop.hbase.TableName)5 Put (org.apache.hadoop.hbase.client.Put)5 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)4 StoreFileReader (org.apache.hadoop.hbase.regionserver.StoreFileReader)4 ImmutableList (org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList)4 InterruptedIOException (java.io.InterruptedIOException)3 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)3 ManualEnvironmentEdge (org.apache.hadoop.hbase.util.ManualEnvironmentEdge)3 FileNotFoundException (java.io.FileNotFoundException)2