Search in sources :

Example 1 with DELETES_STAT

use of org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.DELETES_STAT in project accumulo by apache.

the class TooManyDeletesCompactionStrategy method gatherInformation.

@Override
public void gatherInformation(MajorCompactionRequest request) throws IOException {
    super.gatherInformation(request);
    Predicate<SummarizerConfiguration> summarizerPredicate = conf -> conf.getClassName().equals(DeletesSummarizer.class.getName()) && conf.getOptions().isEmpty();
    long total = 0;
    long deletes = 0;
    for (Entry<FileRef, DataFileValue> entry : request.getFiles().entrySet()) {
        Collection<Summary> summaries = request.getSummaries(Collections.singleton(entry.getKey()), summarizerPredicate);
        if (summaries.size() == 1) {
            Summary summary = summaries.iterator().next();
            total += summary.getStatistics().get(TOTAL_STAT);
            deletes += summary.getStatistics().get(DELETES_STAT);
        } else {
            long numEntries = entry.getValue().getNumEntries();
            if (numEntries == 0 && !proceed_bns) {
                shouldCompact = false;
                return;
            } else {
                // no summary data so use Accumulo's estimate of total entries in file
                total += entry.getValue().getNumEntries();
            }
        }
    }
    long nonDeletes = total - deletes;
    if (nonDeletes >= 0) {
        // check nonDeletes >= 0 because if this is not true then its clear evidence that the estimates are off
        double ratio = deletes / (double) nonDeletes;
        shouldCompact = ratio >= threshold;
    } else {
        shouldCompact = false;
    }
}
Also used : TOTAL_STAT(org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.TOTAL_STAT) Summary(org.apache.accumulo.core.client.summary.Summary) CompactionPlan(org.apache.accumulo.tserver.compaction.CompactionPlan) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Logger(org.slf4j.Logger) Predicate(java.util.function.Predicate) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Collection(java.util.Collection) MajorCompactionRequest(org.apache.accumulo.tserver.compaction.MajorCompactionRequest) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) DeletesSummarizer(org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer) DefaultCompactionStrategy(org.apache.accumulo.tserver.compaction.DefaultCompactionStrategy) WriterOptions(org.apache.accumulo.core.client.rfile.RFile.WriterOptions) DELETES_STAT(org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.DELETES_STAT) Map(java.util.Map) Entry(java.util.Map.Entry) AccumuloFileOutputFormat(org.apache.accumulo.core.client.mapred.AccumuloFileOutputFormat) FileRef(org.apache.accumulo.server.fs.FileRef) Collections(java.util.Collections) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) FileRef(org.apache.accumulo.server.fs.FileRef) Summary(org.apache.accumulo.core.client.summary.Summary) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration)

Aggregations

IOException (java.io.IOException)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 Predicate (java.util.function.Predicate)1 AccumuloFileOutputFormat (org.apache.accumulo.core.client.mapred.AccumuloFileOutputFormat)1 WriterOptions (org.apache.accumulo.core.client.rfile.RFile.WriterOptions)1 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)1 Summary (org.apache.accumulo.core.client.summary.Summary)1 DeletesSummarizer (org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer)1 DELETES_STAT (org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.DELETES_STAT)1 TOTAL_STAT (org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.TOTAL_STAT)1 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)1 FileRef (org.apache.accumulo.server.fs.FileRef)1 CompactionPlan (org.apache.accumulo.tserver.compaction.CompactionPlan)1 DefaultCompactionStrategy (org.apache.accumulo.tserver.compaction.DefaultCompactionStrategy)1 MajorCompactionRequest (org.apache.accumulo.tserver.compaction.MajorCompactionRequest)1 Logger (org.slf4j.Logger)1 LoggerFactory (org.slf4j.LoggerFactory)1