Search in sources :

Example 11 with TableId

use of org.apache.cassandra.schema.TableId in project cassandra by apache.

the class Keyspace method applyInternal.

/**
     * This method appends a row to the global CommitLog, then updates memtables and indexes.
     *
     * @param mutation       the row to write.  Must not be modified after calling apply, since commitlog append
     *                       may happen concurrently, depending on the CL Executor type.
     * @param writeCommitLog false to disable commitlog append entirely
     * @param updateIndexes  false to disable index updates (used by CollationController "defragmenting")
     * @param isDroppable    true if this should throw WriteTimeoutException if it does not acquire lock within write_request_timeout_in_ms
     * @param isDeferrable   true if caller is not waiting for future to complete, so that future may be deferred
     */
private CompletableFuture<?> applyInternal(final Mutation mutation, final boolean writeCommitLog, boolean updateIndexes, boolean isDroppable, boolean isDeferrable, CompletableFuture<?> future) {
    if (TEST_FAIL_WRITES && metadata.name.equals(TEST_FAIL_WRITES_KS))
        throw new RuntimeException("Testing write failures");
    Lock[] locks = null;
    boolean requiresViewUpdate = updateIndexes && viewManager.updatesAffectView(Collections.singleton(mutation), false);
    if (requiresViewUpdate) {
        mutation.viewLockAcquireStart.compareAndSet(0L, System.currentTimeMillis());
        // the order of lock acquisition doesn't matter (from a deadlock perspective) because we only use tryLock()
        Collection<TableId> tableIds = mutation.getTableIds();
        Iterator<TableId> idIterator = tableIds.iterator();
        locks = new Lock[tableIds.size()];
        for (int i = 0; i < tableIds.size(); i++) {
            TableId tableId = idIterator.next();
            int lockKey = Objects.hash(mutation.key().getKey(), tableId);
            while (true) {
                Lock lock = null;
                if (TEST_FAIL_MV_LOCKS_COUNT == 0)
                    lock = ViewManager.acquireLockFor(lockKey);
                else
                    TEST_FAIL_MV_LOCKS_COUNT--;
                if (lock == null) {
                    //throw WTE only if request is droppable
                    if (isDroppable && (System.currentTimeMillis() - mutation.createdAt) > DatabaseDescriptor.getWriteRpcTimeout()) {
                        for (int j = 0; j < i; j++) locks[j].unlock();
                        logger.trace("Could not acquire lock for {} and table {}", ByteBufferUtil.bytesToHex(mutation.key().getKey()), columnFamilyStores.get(tableId).name);
                        Tracing.trace("Could not acquire MV lock");
                        if (future != null) {
                            future.completeExceptionally(new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1));
                            return future;
                        } else
                            throw new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1);
                    } else if (isDeferrable) {
                        for (int j = 0; j < i; j++) locks[j].unlock();
                        // This view update can't happen right now. so rather than keep this thread busy
                        // we will re-apply ourself to the queue and try again later
                        final CompletableFuture<?> mark = future;
                        StageManager.getStage(Stage.MUTATION).execute(() -> applyInternal(mutation, writeCommitLog, true, isDroppable, true, mark));
                        return future;
                    } else {
                        // being blocked by waiting for futures which will never be processed as all workers are blocked
                        try {
                            // Wait a little bit before retrying to lock
                            Thread.sleep(10);
                        } catch (InterruptedException e) {
                        // Just continue
                        }
                        continue;
                    }
                } else {
                    locks[i] = lock;
                }
                break;
            }
        }
        long acquireTime = System.currentTimeMillis() - mutation.viewLockAcquireStart.get();
        // Bulk non-droppable operations (e.g. commitlog replay, hint delivery) are not measured
        if (isDroppable) {
            for (TableId tableId : tableIds) columnFamilyStores.get(tableId).metric.viewLockAcquireTime.update(acquireTime, TimeUnit.MILLISECONDS);
        }
    }
    int nowInSec = FBUtilities.nowInSeconds();
    try (OpOrder.Group opGroup = writeOrder.start()) {
        // write the mutation to the commitlog and memtables
        CommitLogPosition commitLogPosition = null;
        if (writeCommitLog) {
            Tracing.trace("Appending to commitlog");
            commitLogPosition = CommitLog.instance.add(mutation);
        }
        for (PartitionUpdate upd : mutation.getPartitionUpdates()) {
            ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().id);
            if (cfs == null) {
                logger.error("Attempting to mutate non-existant table {} ({}.{})", upd.metadata().id, upd.metadata().keyspace, upd.metadata().name);
                continue;
            }
            AtomicLong baseComplete = new AtomicLong(Long.MAX_VALUE);
            if (requiresViewUpdate) {
                try {
                    Tracing.trace("Creating materialized view mutations from base table replica");
                    viewManager.forTable(upd.metadata().id).pushViewReplicaUpdates(upd, writeCommitLog, baseComplete);
                } catch (Throwable t) {
                    JVMStabilityInspector.inspectThrowable(t);
                    logger.error(String.format("Unknown exception caught while attempting to update MaterializedView! %s", upd.metadata().toString()), t);
                    throw t;
                }
            }
            Tracing.trace("Adding to {} memtable", upd.metadata().name);
            UpdateTransaction indexTransaction = updateIndexes ? cfs.indexManager.newUpdateTransaction(upd, opGroup, nowInSec) : UpdateTransaction.NO_OP;
            cfs.apply(upd, indexTransaction, opGroup, commitLogPosition);
            if (requiresViewUpdate)
                baseComplete.set(System.currentTimeMillis());
        }
        if (future != null) {
            future.complete(null);
        }
        return future;
    } finally {
        if (locks != null) {
            for (Lock lock : locks) if (lock != null)
                lock.unlock();
        }
    }
}
Also used : TableId(org.apache.cassandra.schema.TableId) UpdateTransaction(org.apache.cassandra.index.transactions.UpdateTransaction) CommitLogPosition(org.apache.cassandra.db.commitlog.CommitLogPosition) Lock(java.util.concurrent.locks.Lock) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) AtomicLong(java.util.concurrent.atomic.AtomicLong) OpOrder(org.apache.cassandra.utils.concurrent.OpOrder) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Example 12 with TableId

use of org.apache.cassandra.schema.TableId in project cassandra by apache.

the class CommitLogSegment method removeCleanFromDirty.

private void removeCleanFromDirty() {
    // if we're still allocating from this segment, don't touch anything since it can't be done thread-safely
    if (isStillAllocating())
        return;
    Iterator<Map.Entry<TableId, IntegerInterval.Set>> iter = tableClean.entrySet().iterator();
    while (iter.hasNext()) {
        Map.Entry<TableId, IntegerInterval.Set> clean = iter.next();
        TableId tableId = clean.getKey();
        IntegerInterval.Set cleanSet = clean.getValue();
        IntegerInterval dirtyInterval = tableDirty.get(tableId);
        if (dirtyInterval != null && cleanSet.covers(dirtyInterval)) {
            tableDirty.remove(tableId);
            iter.remove();
        }
    }
}
Also used : TableId(org.apache.cassandra.schema.TableId) IntegerInterval(org.apache.cassandra.utils.IntegerInterval) ConcurrentMap(java.util.concurrent.ConcurrentMap) NonBlockingHashMap(org.cliffc.high_scale_lib.NonBlockingHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 13 with TableId

use of org.apache.cassandra.schema.TableId in project cassandra by apache.

the class IndexSummaryManager method getCompactingAndNonCompactingSSTables.

/**
     * Returns a Pair of all compacting and non-compacting sstables.  Non-compacting sstables will be marked as
     * compacting.
     */
@SuppressWarnings("resource")
private Pair<List<SSTableReader>, Map<TableId, LifecycleTransaction>> getCompactingAndNonCompactingSSTables() {
    List<SSTableReader> allCompacting = new ArrayList<>();
    Map<TableId, LifecycleTransaction> allNonCompacting = new HashMap<>();
    for (Keyspace ks : Keyspace.all()) {
        for (ColumnFamilyStore cfStore : ks.getColumnFamilyStores()) {
            Set<SSTableReader> nonCompacting, allSSTables;
            LifecycleTransaction txn = null;
            do {
                View view = cfStore.getTracker().getView();
                allSSTables = ImmutableSet.copyOf(view.select(SSTableSet.CANONICAL));
                nonCompacting = ImmutableSet.copyOf(view.getUncompacting(allSSTables));
            } while (null == (txn = cfStore.getTracker().tryModify(nonCompacting, OperationType.UNKNOWN)));
            allNonCompacting.put(cfStore.metadata.id, txn);
            allCompacting.addAll(Sets.difference(allSSTables, nonCompacting));
        }
    }
    return Pair.create(allCompacting, allNonCompacting);
}
Also used : TableId(org.apache.cassandra.schema.TableId) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) Keyspace(org.apache.cassandra.db.Keyspace) LifecycleTransaction(org.apache.cassandra.db.lifecycle.LifecycleTransaction) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) View(org.apache.cassandra.db.lifecycle.View)

Example 14 with TableId

use of org.apache.cassandra.schema.TableId in project cassandra by apache.

the class RepairMessageVerbHandler method doVerb.

public void doVerb(final MessageIn<RepairMessage> message, final int id) {
    // TODO add cancel/interrupt message
    RepairJobDesc desc = message.payload.desc;
    try {
        switch(message.payload.messageType) {
            case PREPARE_MESSAGE:
                PrepareMessage prepareMessage = (PrepareMessage) message.payload;
                logger.debug("Preparing, {}", prepareMessage);
                List<ColumnFamilyStore> columnFamilyStores = new ArrayList<>(prepareMessage.tableIds.size());
                for (TableId tableId : prepareMessage.tableIds) {
                    ColumnFamilyStore columnFamilyStore = ColumnFamilyStore.getIfExists(tableId);
                    if (columnFamilyStore == null) {
                        logErrorAndSendFailureResponse(String.format("Table with id %s was dropped during prepare phase of repair", tableId), message.from, id);
                        return;
                    }
                    columnFamilyStores.add(columnFamilyStore);
                }
                ActiveRepairService.instance.registerParentRepairSession(prepareMessage.parentRepairSession, message.from, columnFamilyStores, prepareMessage.ranges, prepareMessage.isIncremental, prepareMessage.timestamp, prepareMessage.isGlobal);
                MessagingService.instance().sendReply(new MessageOut(MessagingService.Verb.INTERNAL_RESPONSE), id, message.from);
                break;
            case SNAPSHOT:
                logger.debug("Snapshotting {}", desc);
                final ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(desc.keyspace, desc.columnFamily);
                if (cfs == null) {
                    logErrorAndSendFailureResponse(String.format("Table %s.%s was dropped during snapshot phase of repair", desc.keyspace, desc.columnFamily), message.from, id);
                    return;
                }
                ActiveRepairService.ParentRepairSession prs = ActiveRepairService.instance.getParentRepairSession(desc.parentSessionId);
                if (prs.isGlobal) {
                    prs.maybeSnapshot(cfs.metadata.id, desc.parentSessionId);
                } else {
                    cfs.snapshot(desc.sessionId.toString(), new Predicate<SSTableReader>() {

                        public boolean apply(SSTableReader sstable) {
                            return sstable != null && // exclude SSTables from 2i
                            !sstable.metadata().isIndex() && new Bounds<>(sstable.first.getToken(), sstable.last.getToken()).intersects(desc.ranges);
                        }
                    }, true, //ephemeral snapshot, if repair fails, it will be cleaned next startup
                    false);
                }
                logger.debug("Enqueuing response to snapshot request {} to {}", desc.sessionId, message.from);
                MessagingService.instance().sendReply(new MessageOut(MessagingService.Verb.INTERNAL_RESPONSE), id, message.from);
                break;
            case VALIDATION_REQUEST:
                ValidationRequest validationRequest = (ValidationRequest) message.payload;
                logger.debug("Validating {}", validationRequest);
                // trigger read-only compaction
                ColumnFamilyStore store = ColumnFamilyStore.getIfExists(desc.keyspace, desc.columnFamily);
                if (store == null) {
                    logger.error("Table {}.{} was dropped during snapshot phase of repair", desc.keyspace, desc.columnFamily);
                    MessagingService.instance().sendOneWay(new ValidationComplete(desc).createMessage(), message.from);
                    return;
                }
                ActiveRepairService.instance.consistent.local.maybeSetRepairing(desc.parentSessionId);
                Validator validator = new Validator(desc, message.from, validationRequest.gcBefore, isConsistent(desc.parentSessionId));
                CompactionManager.instance.submitValidation(store, validator);
                break;
            case SYNC_REQUEST:
                // forwarded sync request
                SyncRequest request = (SyncRequest) message.payload;
                logger.debug("Syncing {}", request);
                long repairedAt = ActiveRepairService.UNREPAIRED_SSTABLE;
                if (desc.parentSessionId != null && ActiveRepairService.instance.getParentRepairSession(desc.parentSessionId) != null)
                    repairedAt = ActiveRepairService.instance.getParentRepairSession(desc.parentSessionId).getRepairedAt();
                StreamingRepairTask task = new StreamingRepairTask(desc, request, repairedAt, isConsistent(desc.parentSessionId));
                task.run();
                break;
            case CLEANUP:
                logger.debug("cleaning up repair");
                CleanupMessage cleanup = (CleanupMessage) message.payload;
                ActiveRepairService.instance.removeParentRepairSession(cleanup.parentRepairSession);
                MessagingService.instance().sendReply(new MessageOut(MessagingService.Verb.INTERNAL_RESPONSE), id, message.from);
                break;
            case CONSISTENT_REQUEST:
                ActiveRepairService.instance.consistent.local.handlePrepareMessage(message.from, (PrepareConsistentRequest) message.payload);
                break;
            case CONSISTENT_RESPONSE:
                ActiveRepairService.instance.consistent.coordinated.handlePrepareResponse((PrepareConsistentResponse) message.payload);
                break;
            case FINALIZE_PROPOSE:
                ActiveRepairService.instance.consistent.local.handleFinalizeProposeMessage(message.from, (FinalizePropose) message.payload);
                break;
            case FINALIZE_PROMISE:
                ActiveRepairService.instance.consistent.coordinated.handleFinalizePromiseMessage((FinalizePromise) message.payload);
                break;
            case FINALIZE_COMMIT:
                ActiveRepairService.instance.consistent.local.handleFinalizeCommitMessage(message.from, (FinalizeCommit) message.payload);
                break;
            case FAILED_SESSION:
                FailSession failure = (FailSession) message.payload;
                ActiveRepairService.instance.consistent.coordinated.handleFailSessionMessage(failure);
                ActiveRepairService.instance.consistent.local.handleFailSessionMessage(message.from, failure);
                break;
            case STATUS_REQUEST:
                ActiveRepairService.instance.consistent.local.handleStatusRequest(message.from, (StatusRequest) message.payload);
                break;
            case STATUS_RESPONSE:
                ActiveRepairService.instance.consistent.local.handleStatusResponse(message.from, (StatusResponse) message.payload);
                break;
            default:
                ActiveRepairService.instance.handleMessage(message.from, message.payload);
                break;
        }
    } catch (Exception e) {
        logger.error("Got error, removing parent repair session");
        if (desc != null && desc.parentSessionId != null)
            ActiveRepairService.instance.removeParentRepairSession(desc.parentSessionId);
        throw new RuntimeException(e);
    }
}
Also used : TableId(org.apache.cassandra.schema.TableId) ActiveRepairService(org.apache.cassandra.service.ActiveRepairService) MessageOut(org.apache.cassandra.net.MessageOut) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore)

Example 15 with TableId

use of org.apache.cassandra.schema.TableId in project cassandra by apache.

the class StreamSession method addTransferFiles.

public synchronized void addTransferFiles(Collection<SSTableStreamingSections> sstableDetails) {
    failIfFinished();
    Iterator<SSTableStreamingSections> iter = sstableDetails.iterator();
    while (iter.hasNext()) {
        SSTableStreamingSections details = iter.next();
        if (details.sections.isEmpty()) {
            // A reference was acquired on the sstable and we won't stream it
            details.ref.release();
            iter.remove();
            continue;
        }
        TableId tableId = details.ref.get().metadata().id;
        StreamTransferTask task = transfers.get(tableId);
        if (task == null) {
            //guarantee atomicity
            StreamTransferTask newTask = new StreamTransferTask(this, tableId);
            task = transfers.putIfAbsent(tableId, newTask);
            if (task == null)
                task = newTask;
        }
        task.addTransferFile(details.ref, details.estimatedKeys, details.sections, details.repairedAt);
        iter.remove();
    }
}
Also used : TableId(org.apache.cassandra.schema.TableId)

Aggregations

TableId (org.apache.cassandra.schema.TableId)24 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)5 Test (org.junit.Test)5 TableMetadata (org.apache.cassandra.schema.TableMetadata)4 InetAddress (java.net.InetAddress)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ConcurrentMap (java.util.concurrent.ConcurrentMap)2 Future (java.util.concurrent.Future)2 Keyspace (org.apache.cassandra.db.Keyspace)2 PartitionUpdate (org.apache.cassandra.db.partitions.PartitionUpdate)2 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)2 IntegerInterval (org.apache.cassandra.utils.IntegerInterval)2 NonBlockingHashMap (org.cliffc.high_scale_lib.NonBlockingHashMap)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 Properties (java.util.Properties)1 ExecutionException (java.util.concurrent.ExecutionException)1 ScheduledFuture (java.util.concurrent.ScheduledFuture)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1