Search in sources :

Example 1 with BinaryReferencesIndex

use of org.apache.jackrabbit.oak.segment.file.tar.binaries.BinaryReferencesIndex in project jackrabbit-oak by apache.

the class TarReader method collectBlobReferences.

/**
 * Collect the references of those BLOBs that are reachable from the entries
 * in this TAR file.
 * <p>
 * The user-provided {@link Predicate} determines if entries belonging to a
 * specific generation should be inspected for binary references of not.
 * Given a generation number as input, if the predicate returns {@code
 * true}, entries from that generation will be skipped. If the predicate
 * returns {@code false}, entries from that generation will be inspected for
 * references.
 * <p>
 * The provided {@link Consumer} is callback object that will be invoked for
 * every reference found in the inspected entries.
 *
 * @param collector      An instance of {@link Consumer}.
 * @param skipGeneration An instance of {@link Predicate}.
 */
void collectBlobReferences(@Nonnull Consumer<String> collector, Predicate<GCGeneration> skipGeneration) {
    BinaryReferencesIndex references = getBinaryReferences();
    if (references == null) {
        return;
    }
    references.forEach((generation, full, compacted, segment, reference) -> {
        if (skipGeneration.apply(newGCGeneration(generation, full, compacted))) {
            return;
        }
        collector.accept(reference);
    });
}
Also used : BinaryReferencesIndex(org.apache.jackrabbit.oak.segment.file.tar.binaries.BinaryReferencesIndex)

Example 2 with BinaryReferencesIndex

use of org.apache.jackrabbit.oak.segment.file.tar.binaries.BinaryReferencesIndex in project jackrabbit-oak by apache.

the class TarReader method sweep.

/**
 * Try to remove every segment contained in a user-provided set.
 * <p>
 * This method might refuse to remove the segments under the following
 * circumstances.
 * <p>
 * First, if this TAR files does not contain any of the segments that are
 * supposed to be removed. In this case, the method returns {@code null}.
 * <p>
 * Second, if this method contains some of the segments that are supposed to
 * be removed, but the reclaimable space is be less than 1/4 of the current
 * size of the TAR file. In this case, this method returns this {@link
 * TarReader}.
 * <p>
 * Third, if this TAR file is in the highest generation possible ('z') and
 * thus a new generation for this TAR file can't be created. In this case,
 * the method returns this {@link TarReader}.
 * <p>
 * Fourth, if a new TAR file has been created but it is unreadable for
 * unknown reasons. In this case, this method returns this {@link
 * TarReader}.
 * <p>
 * If none of the above conditions apply, this method returns a new {@link
 * TarReader} instance tha points to a TAR file that doesn't contain the
 * removed segments. The returned {@link TarReader} will belong to the next
 * generation of this {@link TarReader}. In this case, the {@code reclaimed}
 * set will be updated to contain the identifiers of the segments that were
 * removed from this TAR file.
 *
 * @param reclaim   Set of segment sto reclaim.
 * @param reclaimed Set of reclaimed segments. It will be update if this TAR
 *                  file is rewritten.
 * @return Either this {@link TarReader}, or a new instance of {@link
 * TarReader}, or {@code null}.
 */
TarReader sweep(@Nonnull Set<UUID> reclaim, @Nonnull Set<UUID> reclaimed) throws IOException {
    String name = archive.getName();
    log.debug("Cleaning up {}", name);
    Set<UUID> cleaned = newHashSet();
    int afterSize = 0;
    int beforeSize = 0;
    int afterCount = 0;
    SegmentArchiveEntry[] entries = getEntries();
    for (int i = 0; i < entries.length; i++) {
        SegmentArchiveEntry entry = entries[i];
        beforeSize += archive.getEntrySize(entry.getLength());
        UUID id = new UUID(entry.getMsb(), entry.getLsb());
        if (reclaim.contains(id)) {
            cleaned.add(id);
            entries[i] = null;
        } else {
            afterSize += archive.getEntrySize(entry.getLength());
            afterCount += 1;
        }
    }
    if (afterCount == 0) {
        log.debug("None of the entries of {} are referenceable.", name);
        return null;
    }
    if (afterSize >= beforeSize * 3 / 4 && hasGraph()) {
        // the space savings are not worth it at less than 25%,
        // unless this tar file lacks a pre-compiled segment graph
        // in which case we'll always generate a new tar file with
        // the graph to speed up future garbage collection runs.
        log.debug("Not enough space savings. ({}/{}). Skipping clean up of {}", archive.length() - afterSize, archive.length(), name);
        return this;
    }
    if (!hasGraph()) {
        log.warn("Recovering {}, which is missing its graph.", name);
    }
    int pos = name.length() - "a.tar".length();
    char generation = name.charAt(pos);
    if (generation == 'z') {
        log.debug("No garbage collection after reaching generation z: {}", name);
        return this;
    }
    String newFile = name.substring(0, pos) + (char) (generation + 1) + ".tar";
    log.debug("Writing new generation {}", newFile);
    TarWriter writer = new TarWriter(archiveManager, newFile);
    for (SegmentArchiveEntry entry : entries) {
        if (entry != null) {
            long msb = entry.getMsb();
            long lsb = entry.getLsb();
            int size = entry.getLength();
            GCGeneration gen = GCGeneration.newGCGeneration(entry);
            byte[] data = new byte[size];
            archive.readSegment(msb, lsb).get(data);
            writer.writeEntry(msb, lsb, data, 0, size, gen);
        }
    }
    // Reconstruct the graph index for non-cleaned segments.
    Map<UUID, List<UUID>> graph = getGraph();
    for (Entry<UUID, List<UUID>> e : graph.entrySet()) {
        if (cleaned.contains(e.getKey())) {
            continue;
        }
        Set<UUID> vertices = newHashSet();
        for (UUID vertex : e.getValue()) {
            if (cleaned.contains(vertex)) {
                continue;
            }
            vertices.add(vertex);
        }
        for (UUID vertex : vertices) {
            writer.addGraphEdge(e.getKey(), vertex);
        }
    }
    // Reconstruct the binary reference index for non-cleaned segments.
    BinaryReferencesIndex references = getBinaryReferences();
    if (references != null) {
        references.forEach((gen, full, compacted, id, reference) -> {
            if (cleaned.contains(id)) {
                return;
            }
            writer.addBinaryReference(newGCGeneration(gen, full, compacted), id, reference);
        });
    }
    writer.close();
    TarReader reader = openFirstFileWithValidIndex(singletonList(newFile), archiveManager);
    if (reader != null) {
        reclaimed.addAll(cleaned);
        return reader;
    } else {
        log.warn("Failed to open cleaned up tar file {}", getFileName());
        return this;
    }
}
Also used : BinaryReferencesIndex(org.apache.jackrabbit.oak.segment.file.tar.binaries.BinaryReferencesIndex) SegmentArchiveEntry(org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry) Collections.singletonList(java.util.Collections.singletonList) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) UUID(java.util.UUID) GCGeneration.newGCGeneration(org.apache.jackrabbit.oak.segment.file.tar.GCGeneration.newGCGeneration)

Aggregations

BinaryReferencesIndex (org.apache.jackrabbit.oak.segment.file.tar.binaries.BinaryReferencesIndex)2 Lists.newArrayList (com.google.common.collect.Lists.newArrayList)1 Collections.singletonList (java.util.Collections.singletonList)1 List (java.util.List)1 UUID (java.util.UUID)1 GCGeneration.newGCGeneration (org.apache.jackrabbit.oak.segment.file.tar.GCGeneration.newGCGeneration)1 SegmentArchiveEntry (org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry)1