use of org.apache.jackrabbit.oak.segment.file.tar.binaries.BinaryReferencesIndex in project jackrabbit-oak by apache.
the class TarReader method collectBlobReferences.
/**
* Collect the references of those BLOBs that are reachable from the entries
* in this TAR file.
* <p>
* The user-provided {@link Predicate} determines if entries belonging to a
* specific generation should be inspected for binary references of not.
* Given a generation number as input, if the predicate returns {@code
* true}, entries from that generation will be skipped. If the predicate
* returns {@code false}, entries from that generation will be inspected for
* references.
* <p>
* The provided {@link Consumer} is callback object that will be invoked for
* every reference found in the inspected entries.
*
* @param collector An instance of {@link Consumer}.
* @param skipGeneration An instance of {@link Predicate}.
*/
void collectBlobReferences(@Nonnull Consumer<String> collector, Predicate<GCGeneration> skipGeneration) {
BinaryReferencesIndex references = getBinaryReferences();
if (references == null) {
return;
}
references.forEach((generation, full, compacted, segment, reference) -> {
if (skipGeneration.apply(newGCGeneration(generation, full, compacted))) {
return;
}
collector.accept(reference);
});
}
use of org.apache.jackrabbit.oak.segment.file.tar.binaries.BinaryReferencesIndex in project jackrabbit-oak by apache.
the class TarReader method sweep.
/**
* Try to remove every segment contained in a user-provided set.
* <p>
* This method might refuse to remove the segments under the following
* circumstances.
* <p>
* First, if this TAR files does not contain any of the segments that are
* supposed to be removed. In this case, the method returns {@code null}.
* <p>
* Second, if this method contains some of the segments that are supposed to
* be removed, but the reclaimable space is be less than 1/4 of the current
* size of the TAR file. In this case, this method returns this {@link
* TarReader}.
* <p>
* Third, if this TAR file is in the highest generation possible ('z') and
* thus a new generation for this TAR file can't be created. In this case,
* the method returns this {@link TarReader}.
* <p>
* Fourth, if a new TAR file has been created but it is unreadable for
* unknown reasons. In this case, this method returns this {@link
* TarReader}.
* <p>
* If none of the above conditions apply, this method returns a new {@link
* TarReader} instance tha points to a TAR file that doesn't contain the
* removed segments. The returned {@link TarReader} will belong to the next
* generation of this {@link TarReader}. In this case, the {@code reclaimed}
* set will be updated to contain the identifiers of the segments that were
* removed from this TAR file.
*
* @param reclaim Set of segment sto reclaim.
* @param reclaimed Set of reclaimed segments. It will be update if this TAR
* file is rewritten.
* @return Either this {@link TarReader}, or a new instance of {@link
* TarReader}, or {@code null}.
*/
TarReader sweep(@Nonnull Set<UUID> reclaim, @Nonnull Set<UUID> reclaimed) throws IOException {
String name = archive.getName();
log.debug("Cleaning up {}", name);
Set<UUID> cleaned = newHashSet();
int afterSize = 0;
int beforeSize = 0;
int afterCount = 0;
SegmentArchiveEntry[] entries = getEntries();
for (int i = 0; i < entries.length; i++) {
SegmentArchiveEntry entry = entries[i];
beforeSize += archive.getEntrySize(entry.getLength());
UUID id = new UUID(entry.getMsb(), entry.getLsb());
if (reclaim.contains(id)) {
cleaned.add(id);
entries[i] = null;
} else {
afterSize += archive.getEntrySize(entry.getLength());
afterCount += 1;
}
}
if (afterCount == 0) {
log.debug("None of the entries of {} are referenceable.", name);
return null;
}
if (afterSize >= beforeSize * 3 / 4 && hasGraph()) {
// the space savings are not worth it at less than 25%,
// unless this tar file lacks a pre-compiled segment graph
// in which case we'll always generate a new tar file with
// the graph to speed up future garbage collection runs.
log.debug("Not enough space savings. ({}/{}). Skipping clean up of {}", archive.length() - afterSize, archive.length(), name);
return this;
}
if (!hasGraph()) {
log.warn("Recovering {}, which is missing its graph.", name);
}
int pos = name.length() - "a.tar".length();
char generation = name.charAt(pos);
if (generation == 'z') {
log.debug("No garbage collection after reaching generation z: {}", name);
return this;
}
String newFile = name.substring(0, pos) + (char) (generation + 1) + ".tar";
log.debug("Writing new generation {}", newFile);
TarWriter writer = new TarWriter(archiveManager, newFile);
for (SegmentArchiveEntry entry : entries) {
if (entry != null) {
long msb = entry.getMsb();
long lsb = entry.getLsb();
int size = entry.getLength();
GCGeneration gen = GCGeneration.newGCGeneration(entry);
byte[] data = new byte[size];
archive.readSegment(msb, lsb).get(data);
writer.writeEntry(msb, lsb, data, 0, size, gen);
}
}
// Reconstruct the graph index for non-cleaned segments.
Map<UUID, List<UUID>> graph = getGraph();
for (Entry<UUID, List<UUID>> e : graph.entrySet()) {
if (cleaned.contains(e.getKey())) {
continue;
}
Set<UUID> vertices = newHashSet();
for (UUID vertex : e.getValue()) {
if (cleaned.contains(vertex)) {
continue;
}
vertices.add(vertex);
}
for (UUID vertex : vertices) {
writer.addGraphEdge(e.getKey(), vertex);
}
}
// Reconstruct the binary reference index for non-cleaned segments.
BinaryReferencesIndex references = getBinaryReferences();
if (references != null) {
references.forEach((gen, full, compacted, id, reference) -> {
if (cleaned.contains(id)) {
return;
}
writer.addBinaryReference(newGCGeneration(gen, full, compacted), id, reference);
});
}
writer.close();
TarReader reader = openFirstFileWithValidIndex(singletonList(newFile), archiveManager);
if (reader != null) {
reclaimed.addAll(cleaned);
return reader;
} else {
log.warn("Failed to open cleaned up tar file {}", getFileName());
return this;
}
}
Aggregations