Search in sources :

Example 1 with DeletionTime

use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.

the class RowsTest method createBuilder.

private static Row.Builder createBuilder(Clustering c, int now, ByteBuffer vVal, ByteBuffer mKey, ByteBuffer mVal) {
    long ts = secondToTs(now);
    Row.Builder builder = BTreeRow.unsortedBuilder(now);
    builder.newRow(c);
    builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(ts, now));
    if (vVal != null) {
        builder.addCell(BufferCell.live(v, ts, vVal));
    }
    if (mKey != null && mVal != null) {
        builder.addComplexDeletion(m, new DeletionTime(ts - 1, now));
        builder.addCell(BufferCell.live(m, ts, mVal, CellPath.create(mKey)));
    }
    return builder;
}
Also used : DeletionTime(org.apache.cassandra.db.DeletionTime)

Example 2 with DeletionTime

use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.

the class BTreeRow method updateAllTimestamp.

/**
 * Returns a copy of the row where all timestamps for live data have replaced by {@code newTimestamp} and
 * all deletion timestamp by {@code newTimestamp - 1}.
 *
 * This exists for the Paxos path, see {@link PartitionUpdate#updateAllTimestamp} for additional details.
 */
public Row updateAllTimestamp(long newTimestamp) {
    LivenessInfo newInfo = primaryKeyLivenessInfo.isEmpty() ? primaryKeyLivenessInfo : primaryKeyLivenessInfo.withUpdatedTimestamp(newTimestamp);
    // If the deletion is shadowable and the row has a timestamp, we'll forced the deletion timestamp to be less than the row one, so we
    // should get rid of said deletion.
    Deletion newDeletion = deletion.isLive() || (deletion.isShadowable() && !primaryKeyLivenessInfo.isEmpty()) ? Deletion.LIVE : new Deletion(new DeletionTime(newTimestamp - 1, deletion.time().localDeletionTime()), deletion.isShadowable());
    return transformAndFilter(newInfo, newDeletion, (cd) -> cd.updateAllTimestamp(newTimestamp));
}
Also used : DeletionTime(org.apache.cassandra.db.DeletionTime) LivenessInfo(org.apache.cassandra.db.LivenessInfo)

Example 3 with DeletionTime

use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.

the class ComplexColumnData method filter.

public ComplexColumnData filter(ColumnFilter filter, DeletionTime activeDeletion, DroppedColumn dropped, LivenessInfo rowLiveness) {
    ColumnFilter.Tester cellTester = filter.newTester(column);
    boolean isQueriedColumn = filter.fetchedColumnIsQueried(column);
    if (cellTester == null && activeDeletion.isLive() && dropped == null && isQueriedColumn)
        return this;
    DeletionTime newDeletion = activeDeletion.supersedes(complexDeletion) ? DeletionTime.LIVE : complexDeletion;
    return transformAndFilter(newDeletion, (cell) -> {
        CellPath path = cell.path();
        boolean isForDropped = dropped != null && cell.timestamp() <= dropped.droppedTime;
        boolean isShadowed = activeDeletion.deletes(cell);
        boolean isFetchedCell = cellTester == null || cellTester.fetches(path);
        boolean isQueriedCell = isQueriedColumn && isFetchedCell && (cellTester == null || cellTester.fetchedCellIsQueried(path));
        boolean isSkippableCell = !isFetchedCell || (!isQueriedCell && cell.timestamp() < rowLiveness.timestamp());
        if (isForDropped || isShadowed || isSkippableCell)
            return null;
        // between sstables and memtables data, i.e resulting in a digest mismatch.
        return isQueriedCell ? cell : cell.withSkippedValue();
    });
}
Also used : DeletionTime(org.apache.cassandra.db.DeletionTime) ColumnFilter(org.apache.cassandra.db.filter.ColumnFilter)

Example 4 with DeletionTime

use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.

the class ReplicaFilteringProtection method mergeController.

/**
 * Returns a merge listener that skips the merged rows for which any of the replicas doesn't have a version,
 * pessimistically assuming that they are outdated. It is intended to be used during a first merge of per-replica
 * query results to ensure we fetch enough results from the replicas to ensure we don't miss any potentially
 * outdated result.
 * <p>
 * The listener will track both the accepted data and the primary keys of the rows that are considered as outdated.
 * That way, once the query results would have been merged using this listener, further calls to
 * {@link #queryProtectedPartitions(PartitionIterator, int)} will use the collected data to return a copy of the
 * data originally collected from the specified replica, completed with the potentially outdated rows.
 */
UnfilteredPartitionIterators.MergeListener mergeController() {
    return new UnfilteredPartitionIterators.MergeListener() {

        @Override
        public void close() {
            // If we hit the failure threshold before consuming a single partition, record the current rows cached.
            tableMetrics.rfpRowsCachedPerQuery.update(Math.max(currentRowsCached, maxRowsCached));
        }

        @Override
        public UnfilteredRowIterators.MergeListener getRowMergeListener(DecoratedKey partitionKey, List<UnfilteredRowIterator> versions) {
            List<PartitionBuilder> builders = new ArrayList<>(sources.size());
            RegularAndStaticColumns columns = columns(versions);
            EncodingStats stats = EncodingStats.merge(versions, NULL_TO_NO_STATS);
            for (int i = 0; i < sources.size(); i++) builders.add(i, new PartitionBuilder(partitionKey, sources.get(i), columns, stats));
            return new UnfilteredRowIterators.MergeListener() {

                @Override
                public void onMergedPartitionLevelDeletion(DeletionTime mergedDeletion, DeletionTime[] versions) {
                    // cache the deletion time versions to be able to regenerate the original row iterator
                    for (int i = 0; i < versions.length; i++) builders.get(i).setDeletionTime(versions[i]);
                }

                @Override
                public Row onMergedRows(Row merged, Row[] versions) {
                    // cache the row versions to be able to regenerate the original row iterator
                    for (int i = 0; i < versions.length; i++) builders.get(i).addRow(versions[i]);
                    if (merged.isEmpty())
                        return merged;
                    boolean isPotentiallyOutdated = false;
                    boolean isStatic = merged.isStatic();
                    for (int i = 0; i < versions.length; i++) {
                        Row version = versions[i];
                        if (version == null || (isStatic && version.isEmpty())) {
                            isPotentiallyOutdated = true;
                            builders.get(i).addToFetch(merged);
                        }
                    }
                    // to look at enough data to ultimately fulfill the query limit.
                    return isPotentiallyOutdated ? null : merged;
                }

                @Override
                public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTombstoneMarker[] versions) {
                    // cache the marker versions to be able to regenerate the original row iterator
                    for (int i = 0; i < versions.length; i++) builders.get(i).addRangeTombstoneMarker(versions[i]);
                }

                @Override
                public void close() {
                    for (int i = 0; i < sources.size(); i++) originalPartitions.get(i).add(builders.get(i));
                }
            };
        }
    };
}
Also used : UnfilteredRowIterators(org.apache.cassandra.db.rows.UnfilteredRowIterators) DecoratedKey(org.apache.cassandra.db.DecoratedKey) DeletionTime(org.apache.cassandra.db.DeletionTime) ArrayList(java.util.ArrayList) EncodingStats(org.apache.cassandra.db.rows.EncodingStats) RangeTombstoneMarker(org.apache.cassandra.db.rows.RangeTombstoneMarker) List(java.util.List) ArrayList(java.util.ArrayList) Row(org.apache.cassandra.db.rows.Row) RegularAndStaticColumns(org.apache.cassandra.db.RegularAndStaticColumns)

Example 5 with DeletionTime

use of org.apache.cassandra.db.DeletionTime in project cassandra by apache.

the class RowIteratorMergeListener method onMergedRangeTombstoneMarkers.

public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTombstoneMarker[] versions) {
    // The current deletion as of dealing with this marker.
    DeletionTime currentDeletion = currentDeletion();
    for (int i = 0; i < versions.length; i++) {
        // we are not collecting a mutation for this version/source, skip;
        if (!writeBackTo.get(i))
            continue;
        RangeTombstoneMarker marker = versions[i];
        // Update what the source now thinks is the current deletion
        if (marker != null)
            sourceDeletionTime[i] = marker.isOpen(isReversed) ? marker.openDeletionTime(isReversed) : null;
        // If merged == null, some of the source is opening or closing a marker
        if (merged == null) {
            // but if it's not this source, move to the next one
            if (marker == null)
                continue;
            // we just have nothing to do for that marker).
            assert !currentDeletion.isLive() : currentDeletion.toString();
            // Is the source up to date on deletion? It's up to date if it doesn't have an open RT repair
            // nor an "active" partition level deletion (where "active" means that it's greater or equal
            // to the current deletion: if the source has a repaired partition deletion lower than the
            // current deletion, this means the current deletion is due to a previously open range tombstone,
            // and if the source isn't currently repaired for that RT, then it means it's up to date on it).
            DeletionTime partitionRepairDeletion = partitionLevelRepairDeletion(i);
            if (markerToRepair[i] == null && currentDeletion.supersedes(partitionRepairDeletion)) {
                /*
                     * Since there is an ongoing merged deletion, the only two ways we don't have an open repair for
                     * this source are that:
                     *
                     * 1) it had a range open with the same deletion as current marker, and the marker is coming from
                     *    a short read protection response - repeating the open RT bound, or
                     * 2) it had a range open with the same deletion as current marker, and the marker is closing it.
                     */
                if (// (1)
                !marker.isBoundary() && marker.isOpen(isReversed)) {
                    assert currentDeletion.equals(marker.openDeletionTime(isReversed)) : String.format("currentDeletion=%s, marker=%s", currentDeletion, marker.toString(command.metadata()));
                } else // (2)
                {
                    assert marker.isClose(isReversed) && currentDeletion.equals(marker.closeDeletionTime(isReversed)) : String.format("currentDeletion=%s, marker=%s", currentDeletion, marker.toString(command.metadata()));
                }
                // from that point on.
                if (!(marker.isOpen(isReversed) && currentDeletion.equals(marker.openDeletionTime(isReversed))))
                    markerToRepair[i] = marker.closeBound(isReversed).invert();
            } else // In case 2) above, we only have something to do if the source is up-to-date after that point
            // (which, since the source isn't up-to-date before that point, means we're opening a new deletion
            // that is equal to the current one).
            {
                if (markerToRepair[i] == null) {
                    // an entire partition, we do not include it into repair.
                    assert currentDeletion.localDeletionTime() == partitionRepairDeletion.localDeletionTime();
                } else if (marker.isOpen(isReversed) && currentDeletion.equals(marker.openDeletionTime(isReversed))) {
                    closeOpenMarker(i, marker.openBound(isReversed).invert());
                }
            }
        } else {
            if (merged.isClose(isReversed)) {
                // source, close and add said range to the repair to send.
                if (markerToRepair[i] != null)
                    closeOpenMarker(i, merged.closeBound(isReversed));
            }
            if (merged.isOpen(isReversed)) {
                // If we're opening a new merged range (or just switching deletion), then unless the source
                // is up to date on that deletion (note that we've updated what the source deleteion is
                // above), we'll have to sent the range to the source.
                DeletionTime newDeletion = merged.openDeletionTime(isReversed);
                DeletionTime sourceDeletion = sourceDeletionTime[i];
                if (!newDeletion.equals(sourceDeletion))
                    markerToRepair[i] = merged.openBound(isReversed);
            }
        }
    }
    if (merged != null)
        mergedDeletionTime = merged.isOpen(isReversed) ? merged.openDeletionTime(isReversed) : null;
}
Also used : DeletionTime(org.apache.cassandra.db.DeletionTime) RangeTombstoneMarker(org.apache.cassandra.db.rows.RangeTombstoneMarker)

Aggregations

DeletionTime (org.apache.cassandra.db.DeletionTime)32 Test (org.junit.Test)21 UnfilteredPartitionIterator (org.apache.cassandra.db.partitions.UnfilteredPartitionIterator)10 RowUpdateBuilder (org.apache.cassandra.db.RowUpdateBuilder)8 PartitionIterator (org.apache.cassandra.db.partitions.PartitionIterator)8 EndpointsForRange (org.apache.cassandra.locator.EndpointsForRange)8 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)8 Mutation (org.apache.cassandra.db.Mutation)7 RangeTombstone (org.apache.cassandra.db.RangeTombstone)7 Row (org.apache.cassandra.db.rows.Row)7 LivenessInfo (org.apache.cassandra.db.LivenessInfo)6 BTreeRow (org.apache.cassandra.db.rows.BTreeRow)6 ArrayList (java.util.ArrayList)5 ReadCommand (org.apache.cassandra.db.ReadCommand)4 ComplexColumnData (org.apache.cassandra.db.rows.ComplexColumnData)4 RowIterator (org.apache.cassandra.db.rows.RowIterator)4 TestableReadRepair (org.apache.cassandra.service.reads.repair.TestableReadRepair)4 ByteBuffer (java.nio.ByteBuffer)2 List (java.util.List)2 ColumnIdentifier (org.apache.cassandra.cql3.ColumnIdentifier)2