use of org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.TOTAL_STAT in project accumulo by apache.
the class TooManyDeletesCompactionStrategy method gatherInformation.
@Override
public void gatherInformation(MajorCompactionRequest request) throws IOException {
super.gatherInformation(request);
Predicate<SummarizerConfiguration> summarizerPredicate = conf -> conf.getClassName().equals(DeletesSummarizer.class.getName()) && conf.getOptions().isEmpty();
long total = 0;
long deletes = 0;
for (Entry<FileRef, DataFileValue> entry : request.getFiles().entrySet()) {
Collection<Summary> summaries = request.getSummaries(Collections.singleton(entry.getKey()), summarizerPredicate);
if (summaries.size() == 1) {
Summary summary = summaries.iterator().next();
total += summary.getStatistics().get(TOTAL_STAT);
deletes += summary.getStatistics().get(DELETES_STAT);
} else {
long numEntries = entry.getValue().getNumEntries();
if (numEntries == 0 && !proceed_bns) {
shouldCompact = false;
return;
} else {
// no summary data so use Accumulo's estimate of total entries in file
total += entry.getValue().getNumEntries();
}
}
}
long nonDeletes = total - deletes;
if (nonDeletes >= 0) {
// check nonDeletes >= 0 because if this is not true then its clear evidence that the estimates are off
double ratio = deletes / (double) nonDeletes;
shouldCompact = ratio >= threshold;
} else {
shouldCompact = false;
}
}
Aggregations