use of com.clearspring.analytics.stream.cardinality.ICardinality in project cassandra by apache.
the class SSTableReader method estimateCompactionGain.
/**
* Estimates how much of the keys we would keep if the sstables were compacted together
*/
public static double estimateCompactionGain(Set<SSTableReader> overlapping) {
Set<ICardinality> cardinalities = new HashSet<>(overlapping.size());
for (SSTableReader sstable : overlapping) {
try {
ICardinality cardinality = ((CompactionMetadata) sstable.descriptor.getMetadataSerializer().deserialize(sstable.descriptor, MetadataType.COMPACTION)).cardinalityEstimator;
if (cardinality != null)
cardinalities.add(cardinality);
else
logger.trace("Got a null cardinality estimator in: {}", sstable.getFilename());
} catch (IOException e) {
logger.warn("Could not read up compaction metadata for {}", sstable, e);
}
}
long totalKeyCountBefore = 0;
for (ICardinality cardinality : cardinalities) {
totalKeyCountBefore += cardinality.cardinality();
}
if (totalKeyCountBefore == 0)
return 1;
long totalKeyCountAfter = mergeCardinalities(cardinalities).cardinality();
logger.trace("Estimated compaction gain: {}/{}={}", totalKeyCountAfter, totalKeyCountBefore, ((double) totalKeyCountAfter) / totalKeyCountBefore);
return ((double) totalKeyCountAfter) / totalKeyCountBefore;
}
use of com.clearspring.analytics.stream.cardinality.ICardinality in project metron by apache.
the class HyperLogLogPlus method merge.
/**
* Merges hllp sets and returns new merged set. Does not modify original sets.
*
* @param estimators hllp sets to merge
* @return New merged hllp set
*/
public HyperLogLogPlus merge(List<HyperLogLogPlus> estimators) {
List<com.clearspring.analytics.stream.cardinality.HyperLogLogPlus> converted = Lists.transform(estimators, s -> s.hllp);
ICardinality merged = null;
try {
merged = hllp.merge(converted.toArray(new com.clearspring.analytics.stream.cardinality.HyperLogLogPlus[] {}));
} catch (CardinalityMergeException e) {
throw new IllegalArgumentException("Unable to merge estimators", e);
}
return new HyperLogLogPlus(p, sp, (com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) merged);
}
use of com.clearspring.analytics.stream.cardinality.ICardinality in project cassandra by apache.
the class SSTableReader method getApproximateKeyCount.
/**
* Calculate approximate key count.
* If cardinality estimator is available on all given sstables, then this method use them to estimate
* key count.
* If not, then this uses index summaries.
*
* @param sstables SSTables to calculate key count
* @return estimated key count
*/
public static long getApproximateKeyCount(Iterable<SSTableReader> sstables) {
long count = -1;
if (Iterables.isEmpty(sstables))
return count;
boolean failed = false;
ICardinality cardinality = null;
for (SSTableReader sstable : sstables) {
if (sstable.openReason == OpenReason.EARLY)
continue;
try {
CompactionMetadata metadata = (CompactionMetadata) sstable.descriptor.getMetadataSerializer().deserialize(sstable.descriptor, MetadataType.COMPACTION);
// summary. (CASSANDRA-10676)
if (metadata == null) {
logger.warn("Reading cardinality from Statistics.db failed for {}", sstable.getFilename());
failed = true;
break;
}
if (cardinality == null)
cardinality = metadata.cardinalityEstimator;
else
cardinality = cardinality.merge(metadata.cardinalityEstimator);
} catch (IOException e) {
logger.warn("Reading cardinality from Statistics.db failed.", e);
failed = true;
break;
} catch (CardinalityMergeException e) {
logger.warn("Cardinality merge failed.", e);
failed = true;
break;
}
}
if (cardinality != null && !failed)
count = cardinality.cardinality();
// if something went wrong above or cardinality is not available, calculate using index summary
if (count < 0) {
count = 0;
for (SSTableReader sstable : sstables) count += sstable.estimatedKeys();
}
return count;
}
Aggregations