Search in sources :

Example 16 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class BasicCompactionStrategy method filterFiles.

private MajorCompactionRequest filterFiles(MajorCompactionRequest mcr) {
    if (filterSize != null) {
        Map<StoredTabletFile, DataFileValue> filteredFiles = new HashMap<>();
        mcr.getFiles().forEach((fr, dfv) -> {
            if (dfv.getSize() <= filterSize) {
                filteredFiles.put(fr, dfv);
            }
        });
        mcr = new MajorCompactionRequest(mcr);
        mcr.setFiles(filteredFiles);
    }
    return mcr;
}
Also used : DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) HashMap(java.util.HashMap) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) MajorCompactionRequest(org.apache.accumulo.tserver.compaction.MajorCompactionRequest)

Example 17 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class TooManyDeletesCompactionStrategy method gatherInformation.

@Override
public void gatherInformation(MajorCompactionRequest request) throws IOException {
    super.gatherInformation(request);
    Predicate<SummarizerConfiguration> summarizerPredicate = conf -> conf.getClassName().equals(DeletesSummarizer.class.getName()) && conf.getOptions().isEmpty();
    long total = 0;
    long deletes = 0;
    for (Entry<StoredTabletFile, DataFileValue> entry : request.getFiles().entrySet()) {
        Collection<Summary> summaries = request.getSummaries(Collections.singleton(entry.getKey()), summarizerPredicate);
        if (summaries.size() == 1) {
            Summary summary = summaries.iterator().next();
            total += summary.getStatistics().get(TOTAL_STAT);
            deletes += summary.getStatistics().get(DELETES_STAT);
        } else {
            long numEntries = entry.getValue().getNumEntries();
            if (numEntries == 0 && !proceed_bns) {
                shouldCompact = false;
                return;
            } else {
                // no summary data so use Accumulo's estimate of total entries in file
                total += entry.getValue().getNumEntries();
            }
        }
    }
    long nonDeletes = total - deletes;
    if (nonDeletes >= 0) {
        // check nonDeletes >= 0 because if this is not true then its clear evidence that the
        // estimates are off
        double ratio = deletes / (double) nonDeletes;
        shouldCompact = ratio >= threshold;
    } else {
        shouldCompact = false;
    }
}
Also used : TOTAL_STAT(org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.TOTAL_STAT) Summary(org.apache.accumulo.core.client.summary.Summary) CompactionPlan(org.apache.accumulo.tserver.compaction.CompactionPlan) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Predicate(java.util.function.Predicate) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Collection(java.util.Collection) MajorCompactionRequest(org.apache.accumulo.tserver.compaction.MajorCompactionRequest) IOException(java.io.IOException) DeletesSummarizer(org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer) DefaultCompactionStrategy(org.apache.accumulo.tserver.compaction.DefaultCompactionStrategy) WriterOptions(org.apache.accumulo.core.client.rfile.RFile.WriterOptions) DELETES_STAT(org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.DELETES_STAT) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) Map(java.util.Map) Entry(java.util.Map.Entry) TooManyDeletesSelector(org.apache.accumulo.core.client.admin.compaction.TooManyDeletesSelector) Collections(java.util.Collections) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Summary(org.apache.accumulo.core.client.summary.Summary) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration)

Example 18 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class MetadataTableUtil method finishSplit.

public static void finishSplit(Text metadataEntry, Map<StoredTabletFile, DataFileValue> datafileSizes, List<StoredTabletFile> highDatafilesToRemove, final ServerContext context, ServiceLock zooLock) {
    Mutation m = new Mutation(metadataEntry);
    TabletColumnFamily.SPLIT_RATIO_COLUMN.putDelete(m);
    TabletColumnFamily.OLD_PREV_ROW_COLUMN.putDelete(m);
    ChoppedColumnFamily.CHOPPED_COLUMN.putDelete(m);
    for (Entry<StoredTabletFile, DataFileValue> entry : datafileSizes.entrySet()) {
        m.put(DataFileColumnFamily.NAME, entry.getKey().getMetaInsertText(), new Value(entry.getValue().encode()));
    }
    for (StoredTabletFile pathToRemove : highDatafilesToRemove) {
        m.putDelete(DataFileColumnFamily.NAME, pathToRemove.getMetaUpdateDeleteText());
    }
    update(context, zooLock, m, KeyExtent.fromMetaRow(metadataEntry));
}
Also used : DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Value(org.apache.accumulo.core.data.Value) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) Mutation(org.apache.accumulo.core.data.Mutation)

Example 19 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class MetadataTableUtil method getFileAndLogEntries.

public static Pair<List<LogEntry>, SortedMap<StoredTabletFile, DataFileValue>> getFileAndLogEntries(ServerContext context, KeyExtent extent) throws IOException {
    ArrayList<LogEntry> result = new ArrayList<>();
    TreeMap<StoredTabletFile, DataFileValue> sizes = new TreeMap<>();
    TabletMetadata tablet = context.getAmple().readTablet(extent, FILES, LOGS, PREV_ROW, DIR);
    if (tablet == null) {
        throw new RuntimeException("Tablet " + extent + " not found in metadata");
    }
    result.addAll(tablet.getLogs());
    tablet.getFilesMap().forEach(sizes::put);
    return new Pair<>(result, sizes);
}
Also used : DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) ArrayList(java.util.ArrayList) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TreeMap(java.util.TreeMap) LogEntry(org.apache.accumulo.core.tabletserver.log.LogEntry) Pair(org.apache.accumulo.core.util.Pair)

Example 20 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class Tablet method split.

public TreeMap<KeyExtent, TabletData> split(byte[] sp) throws IOException {
    if (sp != null && extent.endRow() != null && extent.endRow().equals(new Text(sp))) {
        throw new IllegalArgumentException("Attempting to split on EndRow " + extent.endRow() + " for " + extent);
    }
    if (sp != null && sp.length > tableConfiguration.getAsBytes(Property.TABLE_MAX_END_ROW_SIZE)) {
        String msg = "Cannot split tablet " + extent + ", selected split point too long.  Length :  " + sp.length;
        log.warn(msg);
        throw new IOException(msg);
    }
    if (extent.isRootTablet()) {
        String msg = "Cannot split root tablet";
        log.warn(msg);
        throw new RuntimeException(msg);
    }
    try {
        initiateClose(true);
    } catch (IllegalStateException ise) {
        log.debug("File {} not splitting : {}", extent, ise.getMessage());
        return null;
    }
    // obtain this info outside of synch block since it will involve opening
    // the map files... it is ok if the set of map files changes, because
    // this info is used for optimization... it is ok if map files are missing
    // from the set... can still query and insert into the tablet while this
    // map file operation is happening
    Map<TabletFile, FileUtil.FileInfo> firstAndLastRows = FileUtil.tryToGetFirstAndLastRows(context, getDatafileManager().getFiles());
    synchronized (this) {
        // java needs tuples ...
        TreeMap<KeyExtent, TabletData> newTablets = new TreeMap<>();
        long t1 = System.currentTimeMillis();
        // choose a split point
        SplitRowSpec splitPoint;
        if (sp == null) {
            splitPoint = findSplitRow(getDatafileManager().getFiles());
        } else {
            Text tsp = new Text(sp);
            splitPoint = new SplitRowSpec(FileUtil.estimatePercentageLTE(context, chooseTabletDir(), extent.prevEndRow(), extent.endRow(), getDatafileManager().getFiles(), tsp), tsp);
        }
        if (splitPoint == null || splitPoint.row == null) {
            log.info("had to abort split because splitRow was null");
            closeState = CloseState.OPEN;
            return null;
        }
        closeState = CloseState.CLOSING;
        completeClose(true, false);
        Text midRow = splitPoint.row;
        double splitRatio = splitPoint.splitRatio;
        KeyExtent low = new KeyExtent(extent.tableId(), midRow, extent.prevEndRow());
        KeyExtent high = new KeyExtent(extent.tableId(), extent.endRow(), midRow);
        String lowDirectoryName = createTabletDirectoryName(context, midRow);
        // write new tablet information to MetadataTable
        SortedMap<StoredTabletFile, DataFileValue> lowDatafileSizes = new TreeMap<>();
        SortedMap<StoredTabletFile, DataFileValue> highDatafileSizes = new TreeMap<>();
        List<StoredTabletFile> highDatafilesToRemove = new ArrayList<>();
        MetadataTableUtil.splitDatafiles(midRow, splitRatio, firstAndLastRows, getDatafileManager().getDatafileSizes(), lowDatafileSizes, highDatafileSizes, highDatafilesToRemove);
        log.debug("Files for low split {} {}", low, lowDatafileSizes.keySet());
        log.debug("Files for high split {} {}", high, highDatafileSizes.keySet());
        MetadataTime time = tabletTime.getMetadataTime();
        HashSet<ExternalCompactionId> ecids = new HashSet<>();
        compactable.getExternalCompactionIds(ecids::add);
        MetadataTableUtil.splitTablet(high, extent.prevEndRow(), splitRatio, getTabletServer().getContext(), getTabletServer().getLock(), ecids);
        ManagerMetadataUtil.addNewTablet(getTabletServer().getContext(), low, lowDirectoryName, getTabletServer().getTabletSession(), lowDatafileSizes, bulkImported, time, lastFlushID, lastCompactID, getTabletServer().getLock());
        MetadataTableUtil.finishSplit(high, highDatafileSizes, highDatafilesToRemove, getTabletServer().getContext(), getTabletServer().getLock());
        TabletLogger.split(extent, low, high, getTabletServer().getTabletSession());
        newTablets.put(high, new TabletData(dirName, highDatafileSizes, time, lastFlushID, lastCompactID, lastLocation, bulkImported));
        newTablets.put(low, new TabletData(lowDirectoryName, lowDatafileSizes, time, lastFlushID, lastCompactID, lastLocation, bulkImported));
        long t2 = System.currentTimeMillis();
        log.debug(String.format("offline split time : %6.2f secs", (t2 - t1) / 1000.0));
        closeState = CloseState.COMPLETE;
        return newTablets;
    }
}
Also used : ExternalCompactionId(org.apache.accumulo.core.metadata.schema.ExternalCompactionId) ArrayList(java.util.ArrayList) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) MapFileInfo(org.apache.accumulo.core.dataImpl.thrift.MapFileInfo) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TabletFile(org.apache.accumulo.core.metadata.TabletFile) HashSet(java.util.HashSet) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) TreeMap(java.util.TreeMap) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) MetadataTime(org.apache.accumulo.core.metadata.schema.MetadataTime)

Aggregations

StoredTabletFile (org.apache.accumulo.core.metadata.StoredTabletFile)47 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)25 TabletFile (org.apache.accumulo.core.metadata.TabletFile)18 IOException (java.io.IOException)12 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)11 HashMap (java.util.HashMap)9 HashSet (java.util.HashSet)9 Key (org.apache.accumulo.core.data.Key)9 ArrayList (java.util.ArrayList)8 TreeMap (java.util.TreeMap)8 Value (org.apache.accumulo.core.data.Value)8 Path (org.apache.hadoop.fs.Path)7 Text (org.apache.hadoop.io.Text)7 Pair (org.apache.accumulo.core.util.Pair)6 MajorCompactionRequest (org.apache.accumulo.tserver.compaction.MajorCompactionRequest)6 Test (org.junit.Test)6 LogEntry (org.apache.accumulo.core.tabletserver.log.LogEntry)5 UncheckedIOException (java.io.UncheckedIOException)4 CompactionConfig (org.apache.accumulo.core.client.admin.CompactionConfig)4 TServerInstance (org.apache.accumulo.core.metadata.TServerInstance)4