use of org.apache.cassandra.dht.Range in project cassandra by apache.
the class CompactionManager method createMerkleTrees.
private static MerkleTrees createMerkleTrees(Iterable<SSTableReader> sstables, Collection<Range<Token>> ranges, ColumnFamilyStore cfs) {
MerkleTrees tree = new MerkleTrees(cfs.getPartitioner());
long allPartitions = 0;
Map<Range<Token>, Long> rangePartitionCounts = Maps.newHashMapWithExpectedSize(ranges.size());
for (Range<Token> range : ranges) {
long numPartitions = 0;
for (SSTableReader sstable : sstables) numPartitions += sstable.estimatedKeysForRanges(Collections.singleton(range));
rangePartitionCounts.put(range, numPartitions);
allPartitions += numPartitions;
}
for (Range<Token> range : ranges) {
long numPartitions = rangePartitionCounts.get(range);
double rangeOwningRatio = allPartitions > 0 ? (double) numPartitions / allPartitions : 0;
// determine max tree depth proportional to range size to avoid blowing up memory with multiple tress,
// capping at 20 to prevent large tree (CASSANDRA-11390)
int maxDepth = rangeOwningRatio > 0 ? (int) Math.floor(20 - Math.log(1 / rangeOwningRatio) / Math.log(2)) : 0;
// determine tree depth from number of partitions, capping at max tree depth (CASSANDRA-5263)
int depth = numPartitions > 0 ? (int) Math.min(Math.ceil(Math.log(numPartitions) / Math.log(2)), maxDepth) : 0;
tree.addMerkleTree((int) Math.pow(2, depth), range);
}
if (logger.isDebugEnabled()) {
// MT serialize may take time
logger.debug("Created {} merkle trees with merkle trees size {}, {} partitions, {} bytes", tree.ranges().size(), tree.size(), allPartitions, MerkleTrees.serializer.serializedSize(tree, 0));
}
return tree;
}
use of org.apache.cassandra.dht.Range in project cassandra by apache.
the class CompactionManager method antiCompactGroup.
private int antiCompactGroup(ColumnFamilyStore cfs, Collection<Range<Token>> ranges, LifecycleTransaction anticompactionGroup, long repairedAt, UUID pendingRepair) {
long groupMaxDataAge = -1;
for (Iterator<SSTableReader> i = anticompactionGroup.originals().iterator(); i.hasNext(); ) {
SSTableReader sstable = i.next();
if (groupMaxDataAge < sstable.maxDataAge)
groupMaxDataAge = sstable.maxDataAge;
}
if (anticompactionGroup.originals().size() == 0) {
logger.info("No valid anticompactions for this group, All sstables were compacted and are no longer available");
return 0;
}
logger.info("Anticompacting {}", anticompactionGroup);
Set<SSTableReader> sstableAsSet = anticompactionGroup.originals();
File destination = cfs.getDirectories().getWriteableLocationAsFile(cfs.getExpectedCompactedFileSize(sstableAsSet, OperationType.ANTICOMPACTION));
long repairedKeyCount = 0;
long unrepairedKeyCount = 0;
int nowInSec = FBUtilities.nowInSeconds();
CompactionStrategyManager strategy = cfs.getCompactionStrategyManager();
try (SSTableRewriter repairedSSTableWriter = SSTableRewriter.constructWithoutEarlyOpening(anticompactionGroup, false, groupMaxDataAge);
SSTableRewriter unRepairedSSTableWriter = SSTableRewriter.constructWithoutEarlyOpening(anticompactionGroup, false, groupMaxDataAge);
AbstractCompactionStrategy.ScannerList scanners = strategy.getScanners(anticompactionGroup.originals());
CompactionController controller = new CompactionController(cfs, sstableAsSet, getDefaultGcBefore(cfs, nowInSec));
CompactionIterator ci = new CompactionIterator(OperationType.ANTICOMPACTION, scanners.scanners, controller, nowInSec, UUIDGen.getTimeUUID(), metrics)) {
int expectedBloomFilterSize = Math.max(cfs.metadata().params.minIndexInterval, (int) (SSTableReader.getApproximateKeyCount(sstableAsSet)));
repairedSSTableWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, repairedAt, pendingRepair, sstableAsSet, anticompactionGroup));
unRepairedSSTableWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, ActiveRepairService.UNREPAIRED_SSTABLE, null, sstableAsSet, anticompactionGroup));
Range.OrderedRangeContainmentChecker containmentChecker = new Range.OrderedRangeContainmentChecker(ranges);
while (ci.hasNext()) {
try (UnfilteredRowIterator partition = ci.next()) {
// if current range from sstable is repaired, save it into the new repaired sstable
if (containmentChecker.contains(partition.partitionKey().getToken())) {
repairedSSTableWriter.append(partition);
repairedKeyCount++;
} else // otherwise save into the new 'non-repaired' table
{
unRepairedSSTableWriter.append(partition);
unrepairedKeyCount++;
}
}
}
List<SSTableReader> anticompactedSSTables = new ArrayList<>();
// since both writers are operating over the same Transaction, we cannot use the convenience Transactional.finish() method,
// as on the second finish() we would prepareToCommit() on a Transaction that has already been committed, which is forbidden by the API
// (since it indicates misuse). We call permitRedundantTransitions so that calls that transition to a state already occupied are permitted.
anticompactionGroup.permitRedundantTransitions();
repairedSSTableWriter.setRepairedAt(repairedAt).prepareToCommit();
unRepairedSSTableWriter.prepareToCommit();
anticompactedSSTables.addAll(repairedSSTableWriter.finished());
anticompactedSSTables.addAll(unRepairedSSTableWriter.finished());
repairedSSTableWriter.commit();
unRepairedSSTableWriter.commit();
logger.trace("Repaired {} keys out of {} for {}/{} in {}", repairedKeyCount, repairedKeyCount + unrepairedKeyCount, cfs.keyspace.getName(), cfs.getTableName(), anticompactionGroup);
return anticompactedSSTables.size();
} catch (Throwable e) {
JVMStabilityInspector.inspectThrowable(e);
logger.error("Error anticompacting " + anticompactionGroup, e);
}
return 0;
}
use of org.apache.cassandra.dht.Range in project cassandra by apache.
the class ViewBuilder method run.
public void run() {
logger.trace("Running view builder for {}.{}", baseCfs.metadata.keyspace, view.name);
UUID localHostId = SystemKeyspace.getLocalHostId();
String ksname = baseCfs.metadata.keyspace, viewName = view.name;
if (SystemKeyspace.isViewBuilt(ksname, viewName)) {
if (!SystemKeyspace.isViewStatusReplicated(ksname, viewName))
updateDistributed(ksname, viewName, localHostId);
return;
}
Iterable<Range<Token>> ranges = StorageService.instance.getLocalRanges(baseCfs.metadata.keyspace);
final Pair<Integer, Token> buildStatus = SystemKeyspace.getViewBuildStatus(ksname, viewName);
Token lastToken;
Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>> function;
if (buildStatus == null) {
baseCfs.forceBlockingFlush();
function = org.apache.cassandra.db.lifecycle.View.selectFunction(SSTableSet.CANONICAL);
int generation = Integer.MIN_VALUE;
try (Refs<SSTableReader> temp = baseCfs.selectAndReference(function).refs) {
for (SSTableReader reader : temp) {
generation = Math.max(reader.descriptor.generation, generation);
}
}
SystemKeyspace.beginViewBuild(ksname, viewName, generation);
lastToken = null;
} else {
function = new Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>>() {
@Nullable
public Iterable<SSTableReader> apply(org.apache.cassandra.db.lifecycle.View view) {
Iterable<SSTableReader> readers = org.apache.cassandra.db.lifecycle.View.selectFunction(SSTableSet.CANONICAL).apply(view);
if (readers != null)
return Iterables.filter(readers, ssTableReader -> ssTableReader.descriptor.generation <= buildStatus.left);
return null;
}
};
lastToken = buildStatus.right;
}
prevToken = lastToken;
try (Refs<SSTableReader> sstables = baseCfs.selectAndReference(function).refs;
ReducingKeyIterator iter = new ReducingKeyIterator(sstables)) {
SystemDistributedKeyspace.startViewBuild(ksname, viewName, localHostId);
while (!isStopped && iter.hasNext()) {
DecoratedKey key = iter.next();
Token token = key.getToken();
if (lastToken == null || lastToken.compareTo(token) < 0) {
for (Range<Token> range : ranges) {
if (range.contains(token)) {
buildKey(key);
if (prevToken == null || prevToken.compareTo(token) != 0) {
SystemKeyspace.updateViewBuildStatus(ksname, viewName, key.getToken());
prevToken = token;
}
}
}
lastToken = null;
}
}
if (!isStopped) {
SystemKeyspace.finishViewBuildStatus(ksname, viewName);
updateDistributed(ksname, viewName, localHostId);
}
} catch (Exception e) {
ScheduledExecutors.nonPeriodicTasks.schedule(() -> CompactionManager.instance.submitViewBuilder(this), 5, TimeUnit.MINUTES);
logger.warn("Materialized View failed to complete, sleeping 5 minutes before restarting", e);
}
}
use of org.apache.cassandra.dht.Range in project cassandra by apache.
the class RepairRunnable method runMayThrow.
protected void runMayThrow() throws Exception {
final TraceState traceState;
final UUID parentSession = UUIDGen.getTimeUUID();
final String tag = "repair:" + cmd;
final AtomicInteger progress = new AtomicInteger();
// get valid column families, calculate neighbors, validation, prepare for repair + number of ranges to repair
final int totalProgress = 4 + options.getRanges().size();
String[] columnFamilies = options.getColumnFamilies().toArray(new String[options.getColumnFamilies().size()]);
Iterable<ColumnFamilyStore> validColumnFamilies;
try {
validColumnFamilies = storageService.getValidColumnFamilies(false, false, keyspace, columnFamilies);
progress.incrementAndGet();
} catch (IllegalArgumentException e) {
logger.error("Repair failed:", e);
fireErrorAndComplete(tag, progress.get(), totalProgress, e.getMessage());
return;
}
final long startTime = System.currentTimeMillis();
String message = String.format("Starting repair command #%d (%s), repairing keyspace %s with %s", cmd, parentSession, keyspace, options);
logger.info(message);
if (options.isTraced()) {
StringBuilder cfsb = new StringBuilder();
for (ColumnFamilyStore cfs : validColumnFamilies) cfsb.append(", ").append(cfs.keyspace.getName()).append(".").append(cfs.name);
UUID sessionId = Tracing.instance.newSession(Tracing.TraceType.REPAIR);
traceState = Tracing.instance.begin("repair", ImmutableMap.of("keyspace", keyspace, "columnFamilies", cfsb.substring(2)));
message = message + " tracing with " + sessionId;
fireProgressEvent(tag, new ProgressEvent(ProgressEventType.START, 0, 100, message));
Tracing.traceRepair(message);
traceState.enableActivityNotification(tag);
for (ProgressListener listener : listeners) traceState.addProgressListener(listener);
Thread queryThread = createQueryThread(cmd, sessionId);
queryThread.setName("RepairTracePolling");
queryThread.start();
} else {
fireProgressEvent(tag, new ProgressEvent(ProgressEventType.START, 0, 100, message));
traceState = null;
}
final Set<InetAddress> allNeighbors = new HashSet<>();
List<Pair<Set<InetAddress>, ? extends Collection<Range<Token>>>> commonRanges = new ArrayList<>();
//pre-calculate output of getLocalRanges and pass it to getNeighbors to increase performance and prevent
//calculation multiple times
Collection<Range<Token>> keyspaceLocalRanges = storageService.getLocalRanges(keyspace);
try {
for (Range<Token> range : options.getRanges()) {
Set<InetAddress> neighbors = ActiveRepairService.getNeighbors(keyspace, keyspaceLocalRanges, range, options.getDataCenters(), options.getHosts());
addRangeToNeighbors(commonRanges, range, neighbors);
allNeighbors.addAll(neighbors);
}
progress.incrementAndGet();
} catch (IllegalArgumentException e) {
logger.error("Repair failed:", e);
fireErrorAndComplete(tag, progress.get(), totalProgress, e.getMessage());
return;
}
// Validate columnfamilies
List<ColumnFamilyStore> columnFamilyStores = new ArrayList<>();
try {
Iterables.addAll(columnFamilyStores, validColumnFamilies);
progress.incrementAndGet();
} catch (IllegalArgumentException e) {
fireErrorAndComplete(tag, progress.get(), totalProgress, e.getMessage());
return;
}
String[] cfnames = new String[columnFamilyStores.size()];
for (int i = 0; i < columnFamilyStores.size(); i++) {
cfnames[i] = columnFamilyStores.get(i).name;
}
SystemDistributedKeyspace.startParentRepair(parentSession, keyspace, cfnames, options);
long repairedAt;
try {
ActiveRepairService.instance.prepareForRepair(parentSession, FBUtilities.getBroadcastAddress(), allNeighbors, options, columnFamilyStores);
repairedAt = ActiveRepairService.instance.getParentRepairSession(parentSession).getRepairedAt();
progress.incrementAndGet();
} catch (Throwable t) {
SystemDistributedKeyspace.failParentRepair(parentSession, t);
fireErrorAndComplete(tag, progress.get(), totalProgress, t.getMessage());
return;
}
if (options.isIncremental()) {
consistentRepair(parentSession, repairedAt, startTime, traceState, allNeighbors, commonRanges, cfnames);
} else {
normalRepair(parentSession, startTime, traceState, allNeighbors, commonRanges, cfnames);
}
}
use of org.apache.cassandra.dht.Range in project cassandra by apache.
the class PendingRangeMaps method pendingEndpointsFor.
public Collection<InetAddress> pendingEndpointsFor(Token token) {
Set<InetAddress> endpoints = new HashSet<>();
Range searchRange = new Range(token, token);
// search for non-wrap-around maps
NavigableMap<Range<Token>, List<InetAddress>> ascendingTailMap = ascendingMap.tailMap(searchRange, true);
NavigableMap<Range<Token>, List<InetAddress>> descendingTailMap = descendingMap.tailMap(searchRange, false);
// add intersections of two maps
if (ascendingTailMap.size() < descendingTailMap.size()) {
addIntersections(endpoints, ascendingTailMap, descendingTailMap);
} else {
addIntersections(endpoints, descendingTailMap, ascendingTailMap);
}
// search for wrap-around sets
ascendingTailMap = ascendingMapForWrapAround.tailMap(searchRange, true);
descendingTailMap = descendingMapForWrapAround.tailMap(searchRange, false);
// add them since they are all necessary.
for (Map.Entry<Range<Token>, List<InetAddress>> entry : ascendingTailMap.entrySet()) {
endpoints.addAll(entry.getValue());
}
for (Map.Entry<Range<Token>, List<InetAddress>> entry : descendingTailMap.entrySet()) {
endpoints.addAll(entry.getValue());
}
return endpoints;
}
Aggregations