use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.
the class Compactor method getFileDetails.
/**
* Extracts some details about the files to compact that are commonly needed by compactors.
* @param filesToCompact Files.
* @param allFiles Whether all files are included for compaction
* @parma major If major compaction
* @return The result.
*/
private FileDetails getFileDetails(Collection<HStoreFile> filesToCompact, boolean allFiles, boolean major) throws IOException {
FileDetails fd = new FileDetails();
long oldestHFileTimestampToKeepMVCC = EnvironmentEdgeManager.currentTime() - (1000L * 60 * 60 * 24 * this.keepSeqIdPeriod);
for (HStoreFile file : filesToCompact) {
if (allFiles && (file.getModificationTimestamp() < oldestHFileTimestampToKeepMVCC)) {
// MVCC value to keep
if (fd.minSeqIdToKeep < file.getMaxMemStoreTS()) {
fd.minSeqIdToKeep = file.getMaxMemStoreTS();
}
}
long seqNum = file.getMaxSequenceId();
fd.maxSeqId = Math.max(fd.maxSeqId, seqNum);
StoreFileReader r = file.getReader();
if (r == null) {
LOG.warn("Null reader for " + file.getPath());
continue;
}
// NOTE: use getEntries when compacting instead of getFilterEntries, otherwise under-sized
// blooms can cause progress to be miscalculated or if the user switches bloom
// type (e.g. from ROW to ROWCOL)
long keyCount = r.getEntries();
fd.maxKeyCount += keyCount;
// calculate the latest MVCC readpoint in any of the involved store files
Map<byte[], byte[]> fileInfo = r.loadFileInfo();
// calculate the total size of the compacted files
fd.totalCompactedFilesSize += r.length();
byte[] tmp = null;
// SeqId number.
if (r.isBulkLoaded()) {
fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, r.getSequenceID());
} else {
tmp = fileInfo.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
if (tmp != null) {
fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, Bytes.toLong(tmp));
}
}
tmp = fileInfo.get(HFileInfo.MAX_TAGS_LEN);
if (tmp != null) {
fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp));
}
// If required, calculate the earliest put timestamp of all involved storefiles.
// This is used to remove family delete marker during compaction.
long earliestPutTs = 0;
if (allFiles) {
tmp = fileInfo.get(EARLIEST_PUT_TS);
if (tmp == null) {
// There's a file with no information, must be an old one
// assume we have very old puts
fd.earliestPutTs = earliestPutTs = PrivateConstants.OLDEST_TIMESTAMP;
} else {
earliestPutTs = Bytes.toLong(tmp);
fd.earliestPutTs = Math.min(fd.earliestPutTs, earliestPutTs);
}
}
tmp = fileInfo.get(TIMERANGE_KEY);
fd.latestPutTs = tmp == null ? HConstants.LATEST_TIMESTAMP : TimeRangeTracker.parseFrom(tmp).getMax();
LOG.debug("Compacting {}, keycount={}, bloomtype={}, size={}, " + "encoding={}, compression={}, seqNum={}{}", (file.getPath() == null ? null : file.getPath().getName()), keyCount, r.getBloomFilterType().toString(), TraditionalBinaryPrefix.long2String(r.length(), "", 1), r.getHFileReader().getDataBlockEncoding(), major ? majorCompactionCompression : minorCompactionCompression, seqNum, (allFiles ? ", earliestPutTs=" + earliestPutTs : ""));
}
return fd;
}
use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.
the class DateTieredCompactionPolicy method selectMinorCompaction.
/**
* We receive store files sorted in ascending order by seqId then scan the list of files. If the
* current file has a maxTimestamp older than last known maximum, treat this file as it carries
* the last known maximum. This way both seqId and timestamp are in the same order. If files carry
* the same maxTimestamps, they are ordered by seqId. We then reverse the list so they are ordered
* by seqId and maxTimestamp in descending order and build the time windows. All the out-of-order
* data into the same compaction windows, guaranteeing contiguous compaction based on sequence id.
*/
public CompactionRequestImpl selectMinorCompaction(ArrayList<HStoreFile> candidateSelection, boolean mayUseOffPeak, boolean mayBeStuck) throws IOException {
long now = EnvironmentEdgeManager.currentTime();
long oldestToCompact = getOldestToCompact(comConf.getDateTieredMaxStoreFileAgeMillis(), now);
List<Pair<HStoreFile, Long>> storefileMaxTimestampPairs = Lists.newArrayListWithCapacity(candidateSelection.size());
long maxTimestampSeen = Long.MIN_VALUE;
for (HStoreFile storeFile : candidateSelection) {
// if there is out-of-order data,
// we put them in the same window as the last file in increasing order
maxTimestampSeen = Math.max(maxTimestampSeen, storeFile.getMaximumTimestamp().orElse(Long.MIN_VALUE));
storefileMaxTimestampPairs.add(new Pair<>(storeFile, maxTimestampSeen));
}
Collections.reverse(storefileMaxTimestampPairs);
CompactionWindow window = getIncomingWindow(now);
int minThreshold = comConf.getDateTieredIncomingWindowMin();
PeekingIterator<Pair<HStoreFile, Long>> it = Iterators.peekingIterator(storefileMaxTimestampPairs.iterator());
while (it.hasNext()) {
if (window.compareToTimestamp(oldestToCompact) < 0) {
break;
}
int compResult = window.compareToTimestamp(it.peek().getSecond());
if (compResult > 0) {
// If the file is too old for the window, switch to the next window
window = window.nextEarlierWindow();
minThreshold = comConf.getMinFilesToCompact();
} else {
// The file is within the target window
ArrayList<HStoreFile> fileList = Lists.newArrayList();
// we tolerate files with future data although it is sub-optimal
while (it.hasNext() && window.compareToTimestamp(it.peek().getSecond()) <= 0) {
fileList.add(it.next().getFirst());
}
if (fileList.size() >= minThreshold) {
if (LOG.isDebugEnabled()) {
LOG.debug("Processing files: " + fileList + " for window: " + window);
}
DateTieredCompactionRequest request = generateCompactionRequest(fileList, window, mayUseOffPeak, mayBeStuck, minThreshold, now);
if (request != null) {
return request;
}
}
}
}
// A non-null file list is expected by HStore
return new CompactionRequestImpl(Collections.emptyList());
}
use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.
the class ExploringCompactionPolicy method filesInRatio.
/**
* Check that all files satisfy the constraint
* FileSize(i) <= ( Sum(0,N,FileSize(_)) - FileSize(i) ) * Ratio.
*
* @param files List of store files to consider as a compaction candidate.
* @param currentRatio The ratio to use.
* @return a boolean if these files satisfy the ratio constraints.
*/
private boolean filesInRatio(List<HStoreFile> files, double currentRatio) {
if (files.size() < 2) {
return true;
}
long totalFileSize = getTotalStoreSize(files);
for (HStoreFile file : files) {
long singleFileSize = file.getReader().length();
long sumAllOtherFileSizes = totalFileSize - singleFileSize;
if (singleFileSize > sumAllOtherFileSizes * currentRatio) {
return false;
}
}
return true;
}
use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.
the class FIFOCompactionPolicy method getExpiredStores.
private Collection<HStoreFile> getExpiredStores(Collection<HStoreFile> files, Collection<HStoreFile> filesCompacting) {
long currentTime = EnvironmentEdgeManager.currentTime();
Collection<HStoreFile> expiredStores = new ArrayList<>();
for (HStoreFile sf : files) {
if (isEmptyStoreFile(sf) && !filesCompacting.contains(sf)) {
expiredStores.add(sf);
continue;
}
// Check MIN_VERSIONS is in HStore removeUnneededFiles
long maxTs = sf.getReader().getMaxTimestamp();
long maxTtl = storeConfigInfo.getStoreFileTtl();
if (maxTtl == Long.MAX_VALUE || (currentTime - maxTtl < maxTs)) {
continue;
} else if (filesCompacting == null || !filesCompacting.contains(sf)) {
expiredStores.add(sf);
}
}
return expiredStores;
}
use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.
the class StripeCompactionPolicy method selectSingleStripeCompaction.
protected StripeCompactionRequest selectSingleStripeCompaction(StripeInformationProvider si, boolean includeL0, boolean canDropDeletesWithoutL0, boolean isOffpeak) throws IOException {
ArrayList<ImmutableList<HStoreFile>> stripes = si.getStripes();
int bqIndex = -1;
List<HStoreFile> bqSelection = null;
int stripeCount = stripes.size();
long bqTotalSize = -1;
for (int i = 0; i < stripeCount; ++i) {
// If we want to compact L0 to drop deletes, we only want whole-stripe compactions.
// So, pass includeL0 as 2nd parameter to indicate that.
List<HStoreFile> selection = selectSimpleCompaction(stripes.get(i), !canDropDeletesWithoutL0 && includeL0, isOffpeak, false);
if (selection.isEmpty())
continue;
long size = 0;
for (HStoreFile sf : selection) {
size += sf.getReader().length();
}
if (bqSelection == null || selection.size() > bqSelection.size() || (selection.size() == bqSelection.size() && size < bqTotalSize)) {
bqSelection = selection;
bqIndex = i;
bqTotalSize = size;
}
}
if (bqSelection == null) {
LOG.debug("No good compaction is possible in any stripe");
return null;
}
List<HStoreFile> filesToCompact = new ArrayList<>(bqSelection);
// See if we can, and need to, split this stripe.
int targetCount = 1;
long targetKvs = Long.MAX_VALUE;
boolean hasAllFiles = filesToCompact.size() == stripes.get(bqIndex).size();
String splitString = "";
if (hasAllFiles && bqTotalSize >= config.getSplitSize()) {
if (includeL0) {
// So, if we might split, don't compact the stripe with L0.
return null;
}
Pair<Long, Integer> kvsAndCount = estimateTargetKvs(filesToCompact, config.getSplitCount());
targetKvs = kvsAndCount.getFirst();
targetCount = kvsAndCount.getSecond();
splitString = "; the stripe will be split into at most " + targetCount + " stripes with " + targetKvs + " target KVs";
}
LOG.debug("Found compaction in a stripe with end key [" + Bytes.toString(si.getEndRow(bqIndex)) + "], with " + filesToCompact.size() + " files of total size " + bqTotalSize + splitString);
// See if we can drop deletes.
StripeCompactionRequest req;
if (includeL0) {
assert hasAllFiles;
List<HStoreFile> l0Files = si.getLevel0Files();
LOG.debug("Adding " + l0Files.size() + " files to compaction to be able to drop deletes");
ConcatenatedLists<HStoreFile> sfs = new ConcatenatedLists<>();
sfs.addSublist(filesToCompact);
sfs.addSublist(l0Files);
req = new BoundaryStripeCompactionRequest(sfs, si.getStripeBoundaries());
} else {
req = new SplitStripeCompactionRequest(filesToCompact, si.getStartRow(bqIndex), si.getEndRow(bqIndex), targetCount, targetKvs);
}
if (hasAllFiles && (canDropDeletesWithoutL0 || includeL0)) {
req.setMajorRange(si.getStartRow(bqIndex), si.getEndRow(bqIndex));
}
req.getRequest().setOffPeak(isOffpeak);
return req;
}
Aggregations