use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.
the class RowsTest method createBuilder.
private static Row.Builder createBuilder(Clustering c, int now, ByteBuffer vVal, ByteBuffer mKey, ByteBuffer mVal) {
long ts = secondToTs(now);
Row.Builder builder = BTreeRow.unsortedBuilder(now);
builder.newRow(c);
builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(ts, now));
if (vVal != null) {
builder.addCell(BufferCell.live(v, ts, vVal));
}
if (mKey != null && mVal != null) {
builder.addComplexDeletion(m, new DeletionTime(ts - 1, now));
builder.addCell(BufferCell.live(m, ts, mVal, CellPath.create(mKey)));
}
return builder;
}
use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.
the class BTreeRow method updateAllTimestamp.
/**
* Returns a copy of the row where all timestamps for live data have replaced by {@code newTimestamp} and
* all deletion timestamp by {@code newTimestamp - 1}.
*
* This exists for the Paxos path, see {@link PartitionUpdate#updateAllTimestamp} for additional details.
*/
public Row updateAllTimestamp(long newTimestamp) {
LivenessInfo newInfo = primaryKeyLivenessInfo.isEmpty() ? primaryKeyLivenessInfo : primaryKeyLivenessInfo.withUpdatedTimestamp(newTimestamp);
// If the deletion is shadowable and the row has a timestamp, we'll forced the deletion timestamp to be less than the row one, so we
// should get rid of said deletion.
Deletion newDeletion = deletion.isLive() || (deletion.isShadowable() && !primaryKeyLivenessInfo.isEmpty()) ? Deletion.LIVE : new Deletion(new DeletionTime(newTimestamp - 1, deletion.time().localDeletionTime()), deletion.isShadowable());
return transformAndFilter(newInfo, newDeletion, (cd) -> cd.updateAllTimestamp(newTimestamp));
}
use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.
the class ComplexColumnData method filter.
public ComplexColumnData filter(ColumnFilter filter, DeletionTime activeDeletion, DroppedColumn dropped, LivenessInfo rowLiveness) {
ColumnFilter.Tester cellTester = filter.newTester(column);
boolean isQueriedColumn = filter.fetchedColumnIsQueried(column);
if (cellTester == null && activeDeletion.isLive() && dropped == null && isQueriedColumn)
return this;
DeletionTime newDeletion = activeDeletion.supersedes(complexDeletion) ? DeletionTime.LIVE : complexDeletion;
return transformAndFilter(newDeletion, (cell) -> {
CellPath path = cell.path();
boolean isForDropped = dropped != null && cell.timestamp() <= dropped.droppedTime;
boolean isShadowed = activeDeletion.deletes(cell);
boolean isFetchedCell = cellTester == null || cellTester.fetches(path);
boolean isQueriedCell = isQueriedColumn && isFetchedCell && (cellTester == null || cellTester.fetchedCellIsQueried(path));
boolean isSkippableCell = !isFetchedCell || (!isQueriedCell && cell.timestamp() < rowLiveness.timestamp());
if (isForDropped || isShadowed || isSkippableCell)
return null;
// between sstables and memtables data, i.e resulting in a digest mismatch.
return isQueriedCell ? cell : cell.withSkippedValue();
});
}
use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.
the class ReplicaFilteringProtection method mergeController.
/**
* Returns a merge listener that skips the merged rows for which any of the replicas doesn't have a version,
* pessimistically assuming that they are outdated. It is intended to be used during a first merge of per-replica
* query results to ensure we fetch enough results from the replicas to ensure we don't miss any potentially
* outdated result.
* <p>
* The listener will track both the accepted data and the primary keys of the rows that are considered as outdated.
* That way, once the query results would have been merged using this listener, further calls to
* {@link #queryProtectedPartitions(PartitionIterator, int)} will use the collected data to return a copy of the
* data originally collected from the specified replica, completed with the potentially outdated rows.
*/
UnfilteredPartitionIterators.MergeListener mergeController() {
return new UnfilteredPartitionIterators.MergeListener() {
@Override
public void close() {
// If we hit the failure threshold before consuming a single partition, record the current rows cached.
tableMetrics.rfpRowsCachedPerQuery.update(Math.max(currentRowsCached, maxRowsCached));
}
@Override
public UnfilteredRowIterators.MergeListener getRowMergeListener(DecoratedKey partitionKey, List<UnfilteredRowIterator> versions) {
List<PartitionBuilder> builders = new ArrayList<>(sources.size());
RegularAndStaticColumns columns = columns(versions);
EncodingStats stats = EncodingStats.merge(versions, NULL_TO_NO_STATS);
for (int i = 0; i < sources.size(); i++) builders.add(i, new PartitionBuilder(partitionKey, sources.get(i), columns, stats));
return new UnfilteredRowIterators.MergeListener() {
@Override
public void onMergedPartitionLevelDeletion(DeletionTime mergedDeletion, DeletionTime[] versions) {
// cache the deletion time versions to be able to regenerate the original row iterator
for (int i = 0; i < versions.length; i++) builders.get(i).setDeletionTime(versions[i]);
}
@Override
public Row onMergedRows(Row merged, Row[] versions) {
// cache the row versions to be able to regenerate the original row iterator
for (int i = 0; i < versions.length; i++) builders.get(i).addRow(versions[i]);
if (merged.isEmpty())
return merged;
boolean isPotentiallyOutdated = false;
boolean isStatic = merged.isStatic();
for (int i = 0; i < versions.length; i++) {
Row version = versions[i];
if (version == null || (isStatic && version.isEmpty())) {
isPotentiallyOutdated = true;
builders.get(i).addToFetch(merged);
}
}
// to look at enough data to ultimately fulfill the query limit.
return isPotentiallyOutdated ? null : merged;
}
@Override
public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTombstoneMarker[] versions) {
// cache the marker versions to be able to regenerate the original row iterator
for (int i = 0; i < versions.length; i++) builders.get(i).addRangeTombstoneMarker(versions[i]);
}
@Override
public void close() {
for (int i = 0; i < sources.size(); i++) originalPartitions.get(i).add(builders.get(i));
}
};
}
};
}
use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.
the class RowIteratorMergeListener method onMergedRangeTombstoneMarkers.
public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTombstoneMarker[] versions) {
// The current deletion as of dealing with this marker.
DeletionTime currentDeletion = currentDeletion();
for (int i = 0; i < versions.length; i++) {
// we are not collecting a mutation for this version/source, skip;
if (!writeBackTo.get(i))
continue;
RangeTombstoneMarker marker = versions[i];
// Update what the source now thinks is the current deletion
if (marker != null)
sourceDeletionTime[i] = marker.isOpen(isReversed) ? marker.openDeletionTime(isReversed) : null;
// If merged == null, some of the source is opening or closing a marker
if (merged == null) {
// but if it's not this source, move to the next one
if (marker == null)
continue;
// we just have nothing to do for that marker).
assert !currentDeletion.isLive() : currentDeletion.toString();
// Is the source up to date on deletion? It's up to date if it doesn't have an open RT repair
// nor an "active" partition level deletion (where "active" means that it's greater or equal
// to the current deletion: if the source has a repaired partition deletion lower than the
// current deletion, this means the current deletion is due to a previously open range tombstone,
// and if the source isn't currently repaired for that RT, then it means it's up to date on it).
DeletionTime partitionRepairDeletion = partitionLevelRepairDeletion(i);
if (markerToRepair[i] == null && currentDeletion.supersedes(partitionRepairDeletion)) {
/*
* Since there is an ongoing merged deletion, the only two ways we don't have an open repair for
* this source are that:
*
* 1) it had a range open with the same deletion as current marker, and the marker is coming from
* a short read protection response - repeating the open RT bound, or
* 2) it had a range open with the same deletion as current marker, and the marker is closing it.
*/
if (// (1)
!marker.isBoundary() && marker.isOpen(isReversed)) {
assert currentDeletion.equals(marker.openDeletionTime(isReversed)) : String.format("currentDeletion=%s, marker=%s", currentDeletion, marker.toString(command.metadata()));
} else // (2)
{
assert marker.isClose(isReversed) && currentDeletion.equals(marker.closeDeletionTime(isReversed)) : String.format("currentDeletion=%s, marker=%s", currentDeletion, marker.toString(command.metadata()));
}
// from that point on.
if (!(marker.isOpen(isReversed) && currentDeletion.equals(marker.openDeletionTime(isReversed))))
markerToRepair[i] = marker.closeBound(isReversed).invert();
} else // In case 2) above, we only have something to do if the source is up-to-date after that point
// (which, since the source isn't up-to-date before that point, means we're opening a new deletion
// that is equal to the current one).
{
if (markerToRepair[i] == null) {
// an entire partition, we do not include it into repair.
assert currentDeletion.localDeletionTime() == partitionRepairDeletion.localDeletionTime();
} else if (marker.isOpen(isReversed) && currentDeletion.equals(marker.openDeletionTime(isReversed))) {
closeOpenMarker(i, marker.openBound(isReversed).invert());
}
}
} else {
if (merged.isClose(isReversed)) {
// source, close and add said range to the repair to send.
if (markerToRepair[i] != null)
closeOpenMarker(i, merged.closeBound(isReversed));
}
if (merged.isOpen(isReversed)) {
// If we're opening a new merged range (or just switching deletion), then unless the source
// is up to date on that deletion (note that we've updated what the source deleteion is
// above), we'll have to sent the range to the source.
DeletionTime newDeletion = merged.openDeletionTime(isReversed);
DeletionTime sourceDeletion = sourceDeletionTime[i];
if (!newDeletion.equals(sourceDeletion))
markerToRepair[i] = merged.openBound(isReversed);
}
}
}
if (merged != null)
mergedDeletionTime = merged.isOpen(isReversed) ? merged.openDeletionTime(isReversed) : null;
}
Aggregations