Search in sources :

Example 31 with ACIDException

use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.

the class RecoveryManager method startRecoveryRedoPhase.

private synchronized void startRecoveryRedoPhase(Set<Integer> partitions, ILogReader logReader, long lowWaterMarkLSN, Set<Integer> winnerJobSet) throws IOException, ACIDException {
    int redoCount = 0;
    int jobId = -1;
    long resourceId;
    long maxDiskLastLsn;
    long lsn = -1;
    ILSMIndex index = null;
    LocalResource localResource = null;
    DatasetLocalResource localResourceMetadata = null;
    boolean foundWinner = false;
    JobEntityCommits jobEntityWinners = null;
    IAppRuntimeContextProvider appRuntimeContext = txnSubsystem.getAsterixAppRuntimeContextProvider();
    IDatasetLifecycleManager datasetLifecycleManager = appRuntimeContext.getDatasetLifecycleManager();
    Map<Long, LocalResource> resourcesMap = localResourceRepository.loadAndGetAllResources();
    Map<Long, Long> resourceId2MaxLSNMap = new HashMap<>();
    TxnId tempKeyTxnId = new TxnId(-1, -1, -1, null, -1, false);
    ILogRecord logRecord = null;
    try {
        logReader.initializeScan(lowWaterMarkLSN);
        logRecord = logReader.next();
        while (logRecord != null) {
            if (IS_DEBUG_MODE) {
                LOGGER.info(logRecord.getLogRecordForDisplay());
            }
            lsn = logRecord.getLSN();
            jobId = logRecord.getJobId();
            foundWinner = false;
            switch(logRecord.getLogType()) {
                case LogType.UPDATE:
                    if (partitions.contains(logRecord.getResourcePartition())) {
                        if (winnerJobSet.contains(jobId)) {
                            foundWinner = true;
                        } else if (jobId2WinnerEntitiesMap.containsKey(jobId)) {
                            jobEntityWinners = jobId2WinnerEntitiesMap.get(jobId);
                            tempKeyTxnId.setTxnId(jobId, logRecord.getDatasetId(), logRecord.getPKHashValue(), logRecord.getPKValue(), logRecord.getPKValueSize());
                            if (jobEntityWinners.containsEntityCommitForTxnId(lsn, tempKeyTxnId)) {
                                foundWinner = true;
                            }
                        }
                        if (foundWinner) {
                            resourceId = logRecord.getResourceId();
                            localResource = resourcesMap.get(resourceId);
                            /*******************************************************************
                                 * [Notice]
                                 * -> Issue
                                 * Delete index may cause a problem during redo.
                                 * The index operation to be redone couldn't be redone because the corresponding index
                                 * may not exist in NC due to the possible index drop DDL operation.
                                 * -> Approach
                                 * Avoid the problem during redo.
                                 * More specifically, the problem will be detected when the localResource of
                                 * the corresponding index is retrieved, which will end up with 'null'.
                                 * If null is returned, then just go and process the next
                                 * log record.
                                 *******************************************************************/
                            if (localResource == null) {
                                LOGGER.log(Level.WARNING, "resource was not found for resource id " + resourceId);
                                logRecord = logReader.next();
                                continue;
                            }
                            /*******************************************************************/
                            //get index instance from IndexLifeCycleManager
                            //if index is not registered into IndexLifeCycleManager,
                            //create the index using LocalMetadata stored in LocalResourceRepository
                            //get partition path in this node
                            localResourceMetadata = (DatasetLocalResource) localResource.getResource();
                            index = (ILSMIndex) datasetLifecycleManager.get(localResource.getPath());
                            if (index == null) {
                                //#. create index instance and register to indexLifeCycleManager
                                index = (ILSMIndex) localResourceMetadata.createInstance(serviceCtx);
                                datasetLifecycleManager.register(localResource.getPath(), index);
                                datasetLifecycleManager.open(localResource.getPath());
                                //#. get maxDiskLastLSN
                                ILSMIndex lsmIndex = index;
                                try {
                                    maxDiskLastLsn = ((AbstractLSMIOOperationCallback) lsmIndex.getIOOperationCallback()).getComponentLSN(lsmIndex.getImmutableComponents());
                                } catch (HyracksDataException e) {
                                    datasetLifecycleManager.close(localResource.getPath());
                                    throw e;
                                }
                                //#. set resourceId and maxDiskLastLSN to the map
                                resourceId2MaxLSNMap.put(resourceId, maxDiskLastLsn);
                            } else {
                                maxDiskLastLsn = resourceId2MaxLSNMap.get(resourceId);
                            }
                            if (lsn > maxDiskLastLsn) {
                                redo(logRecord, datasetLifecycleManager);
                                redoCount++;
                            }
                        }
                    }
                    break;
                case LogType.JOB_COMMIT:
                case LogType.ENTITY_COMMIT:
                case LogType.ABORT:
                case LogType.FLUSH:
                case LogType.WAIT:
                case LogType.MARKER:
                    //do nothing
                    break;
                default:
                    throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
            }
            logRecord = logReader.next();
        }
        LOGGER.info("Logs REDO phase completed. Redo logs count: " + redoCount);
    } finally {
        //close all indexes
        Set<Long> resourceIdList = resourceId2MaxLSNMap.keySet();
        for (long r : resourceIdList) {
            datasetLifecycleManager.close(resourcesMap.get(r).getPath());
        }
    }
}
Also used : HashMap(java.util.HashMap) ILSMIndex(org.apache.hyracks.storage.am.lsm.common.api.ILSMIndex) Checkpoint(org.apache.asterix.common.transactions.Checkpoint) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) DatasetLocalResource(org.apache.asterix.common.dataflow.DatasetLocalResource) LocalResource(org.apache.hyracks.storage.common.LocalResource) ACIDException(org.apache.asterix.common.exceptions.ACIDException) DatasetLocalResource(org.apache.asterix.common.dataflow.DatasetLocalResource) IDatasetLifecycleManager(org.apache.asterix.common.api.IDatasetLifecycleManager) TxnId(org.apache.asterix.transaction.management.service.recovery.TxnId) IAppRuntimeContextProvider(org.apache.asterix.common.transactions.IAppRuntimeContextProvider) ILogRecord(org.apache.asterix.common.transactions.ILogRecord)

Example 32 with ACIDException

use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.

the class RecoveryManager method rollbackTransaction.

@Override
public void rollbackTransaction(ITransactionContext txnContext) throws ACIDException {
    int abortedJobId = txnContext.getJobId().getId();
    // Obtain the first/last log record LSNs written by the Job
    long firstLSN = txnContext.getFirstLSN();
    /**
         * The effect of any log record with LSN below minFirstLSN has already been written to disk and
         * will not be rolled back. Therefore, we will set the first LSN of the job to the maximum of
         * minFirstLSN and the job's first LSN.
         */
    try {
        long localMinFirstLSN = getLocalMinFirstLSN();
        firstLSN = Math.max(firstLSN, localMinFirstLSN);
    } catch (HyracksDataException e) {
        throw new ACIDException(e);
    }
    long lastLSN = txnContext.getLastLSN();
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("rollbacking transaction log records from " + firstLSN + " to " + lastLSN);
    }
    // check if the transaction actually wrote some logs.
    if (firstLSN == TransactionManagementConstants.LogManagerConstants.TERMINAL_LSN || firstLSN > lastLSN) {
        if (LOGGER.isLoggable(Level.INFO)) {
            LOGGER.info("no need to roll back as there were no operations by the job " + txnContext.getJobId());
        }
        return;
    }
    // While reading log records from firstLsn to lastLsn, collect uncommitted txn's Lsns
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("collecting loser transaction's LSNs from " + firstLSN + " to " + lastLSN);
    }
    Map<TxnId, List<Long>> jobLoserEntity2LSNsMap = new HashMap<>();
    TxnId tempKeyTxnId = new TxnId(-1, -1, -1, null, -1, false);
    int updateLogCount = 0;
    int entityCommitLogCount = 0;
    int logJobId = -1;
    long currentLSN = -1;
    TxnId loserEntity = null;
    List<Long> undoLSNSet = null;
    //get active partitions on this node
    Set<Integer> activePartitions = localResourceRepository.getActivePartitions();
    ILogReader logReader = logMgr.getLogReader(false);
    try {
        logReader.initializeScan(firstLSN);
        ILogRecord logRecord = null;
        while (currentLSN < lastLSN) {
            logRecord = logReader.next();
            if (logRecord == null) {
                break;
            } else {
                currentLSN = logRecord.getLSN();
                if (IS_DEBUG_MODE) {
                    LOGGER.info(logRecord.getLogRecordForDisplay());
                }
            }
            logJobId = logRecord.getJobId();
            if (logJobId != abortedJobId) {
                continue;
            }
            tempKeyTxnId.setTxnId(logJobId, logRecord.getDatasetId(), logRecord.getPKHashValue(), logRecord.getPKValue(), logRecord.getPKValueSize());
            switch(logRecord.getLogType()) {
                case LogType.UPDATE:
                    if (activePartitions.contains(logRecord.getResourcePartition())) {
                        undoLSNSet = jobLoserEntity2LSNsMap.get(tempKeyTxnId);
                        if (undoLSNSet == null) {
                            loserEntity = new TxnId(logJobId, logRecord.getDatasetId(), logRecord.getPKHashValue(), logRecord.getPKValue(), logRecord.getPKValueSize(), true);
                            undoLSNSet = new LinkedList<>();
                            jobLoserEntity2LSNsMap.put(loserEntity, undoLSNSet);
                        }
                        undoLSNSet.add(currentLSN);
                        updateLogCount++;
                        if (IS_DEBUG_MODE) {
                            LOGGER.info(Thread.currentThread().getId() + "======> update[" + currentLSN + "]:" + tempKeyTxnId);
                        }
                    }
                    break;
                case LogType.ENTITY_COMMIT:
                    if (activePartitions.contains(logRecord.getResourcePartition())) {
                        jobLoserEntity2LSNsMap.remove(tempKeyTxnId);
                        entityCommitLogCount++;
                        if (IS_DEBUG_MODE) {
                            LOGGER.info(Thread.currentThread().getId() + "======> entity_commit[" + currentLSN + "]" + tempKeyTxnId);
                        }
                    }
                    break;
                case LogType.JOB_COMMIT:
                    throw new ACIDException("Unexpected LogType(" + logRecord.getLogType() + ") during abort.");
                case LogType.ABORT:
                case LogType.FLUSH:
                case LogType.WAIT:
                case LogType.MARKER:
                    //ignore
                    break;
                default:
                    throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
            }
        }
        if (currentLSN != lastLSN) {
            throw new ACIDException("LastLSN mismatch: lastLSN(" + lastLSN + ") vs currentLSN(" + currentLSN + ") during abort( " + txnContext.getJobId() + ")");
        }
        //undo loserTxn's effect
        LOGGER.log(Level.INFO, "undoing loser transaction's effect");
        IDatasetLifecycleManager datasetLifecycleManager = txnSubsystem.getAsterixAppRuntimeContextProvider().getDatasetLifecycleManager();
        //TODO sort loser entities by smallest LSN to undo in one pass.
        Iterator<Entry<TxnId, List<Long>>> iter = jobLoserEntity2LSNsMap.entrySet().iterator();
        int undoCount = 0;
        while (iter.hasNext()) {
            Map.Entry<TxnId, List<Long>> loserEntity2LSNsMap = iter.next();
            undoLSNSet = loserEntity2LSNsMap.getValue();
            // The step below is important since the upsert operations must be done in reverse order.
            Collections.reverse(undoLSNSet);
            for (long undoLSN : undoLSNSet) {
                //here, all the log records are UPDATE type. So, we don't need to check the type again.
                //read the corresponding log record to be undone.
                logRecord = logReader.read(undoLSN);
                if (logRecord == null) {
                    throw new ACIDException("IllegalState exception during abort( " + txnContext.getJobId() + ")");
                }
                if (IS_DEBUG_MODE) {
                    LOGGER.info(logRecord.getLogRecordForDisplay());
                }
                undo(logRecord, datasetLifecycleManager);
                undoCount++;
            }
        }
        if (LOGGER.isLoggable(Level.INFO)) {
            LOGGER.info("undone loser transaction's effect");
            LOGGER.info("[RecoveryManager's rollback log count] update/entityCommit/undo:" + updateLogCount + "/" + entityCommitLogCount + "/" + undoCount);
        }
    } finally {
        logReader.close();
    }
}
Also used : HashMap(java.util.HashMap) Checkpoint(org.apache.asterix.common.transactions.Checkpoint) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ILogReader(org.apache.asterix.common.transactions.ILogReader) ACIDException(org.apache.asterix.common.exceptions.ACIDException) IDatasetLifecycleManager(org.apache.asterix.common.api.IDatasetLifecycleManager) TxnId(org.apache.asterix.transaction.management.service.recovery.TxnId) Entry(java.util.Map.Entry) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ILogRecord(org.apache.asterix.common.transactions.ILogRecord) Map(java.util.Map) HashMap(java.util.HashMap)

Example 33 with ACIDException

use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.

the class QueryTranslator method handleCreateFeedPolicyStatement.

protected void handleCreateFeedPolicyStatement(MetadataProvider metadataProvider, Statement stmt) throws AlgebricksException, HyracksDataException {
    String dataverse;
    String policy;
    FeedPolicyEntity newPolicy = null;
    MetadataTransactionContext mdTxnCtx = null;
    CreateFeedPolicyStatement cfps = (CreateFeedPolicyStatement) stmt;
    dataverse = getActiveDataverse(null);
    policy = cfps.getPolicyName();
    MetadataLockManager.INSTANCE.createFeedPolicyBegin(metadataProvider.getLocks(), dataverse, dataverse + "." + policy);
    try {
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        FeedPolicyEntity feedPolicy = MetadataManager.INSTANCE.getFeedPolicy(metadataProvider.getMetadataTxnContext(), dataverse, policy);
        if (feedPolicy != null) {
            if (cfps.getIfNotExists()) {
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                return;
            } else {
                throw new AlgebricksException("A policy with this name " + policy + " already exists.");
            }
        }
        boolean extendingExisting = cfps.getSourcePolicyName() != null;
        String description = cfps.getDescription() == null ? "" : cfps.getDescription();
        if (extendingExisting) {
            FeedPolicyEntity sourceFeedPolicy = MetadataManager.INSTANCE.getFeedPolicy(metadataProvider.getMetadataTxnContext(), dataverse, cfps.getSourcePolicyName());
            if (sourceFeedPolicy == null) {
                sourceFeedPolicy = MetadataManager.INSTANCE.getFeedPolicy(metadataProvider.getMetadataTxnContext(), MetadataConstants.METADATA_DATAVERSE_NAME, cfps.getSourcePolicyName());
                if (sourceFeedPolicy == null) {
                    throw new AlgebricksException("Unknown policy " + cfps.getSourcePolicyName());
                }
            }
            Map<String, String> policyProperties = sourceFeedPolicy.getProperties();
            policyProperties.putAll(cfps.getProperties());
            newPolicy = new FeedPolicyEntity(dataverse, policy, description, policyProperties);
        } else {
            Properties prop = new Properties();
            try {
                InputStream stream = new FileInputStream(cfps.getSourcePolicyFile());
                prop.load(stream);
            } catch (Exception e) {
                throw new AlgebricksException("Unable to read policy file" + cfps.getSourcePolicyFile(), e);
            }
            Map<String, String> policyProperties = new HashMap<>();
            for (Entry<Object, Object> entry : prop.entrySet()) {
                policyProperties.put((String) entry.getKey(), (String) entry.getValue());
            }
            newPolicy = new FeedPolicyEntity(dataverse, policy, description, policyProperties);
        }
        MetadataManager.INSTANCE.addFeedPolicy(mdTxnCtx, newPolicy);
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
    } catch (RemoteException | ACIDException e) {
        abort(e, e, mdTxnCtx);
        throw new HyracksDataException(e);
    } finally {
        metadataProvider.getLocks().unlock();
    }
}
Also used : HashMap(java.util.HashMap) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) Properties(java.util.Properties) ClusterProperties(org.apache.asterix.common.config.ClusterProperties) ExternalProperties(org.apache.asterix.common.config.ExternalProperties) FileInputStream(java.io.FileInputStream) ACIDException(org.apache.asterix.common.exceptions.ACIDException) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) RemoteException(java.rmi.RemoteException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ACIDException(org.apache.asterix.common.exceptions.ACIDException) CreateFeedPolicyStatement(org.apache.asterix.lang.common.statement.CreateFeedPolicyStatement) FeedPolicyEntity(org.apache.asterix.metadata.entities.FeedPolicyEntity) MutableObject(org.apache.commons.lang3.mutable.MutableObject) RemoteException(java.rmi.RemoteException)

Example 34 with ACIDException

use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.

the class RecoveryManager method startRecoverysAnalysisPhase.

private synchronized Set<Integer> startRecoverysAnalysisPhase(Set<Integer> partitions, ILogReader logReader, long lowWaterMarkLSN) throws IOException, ACIDException {
    int updateLogCount = 0;
    int entityCommitLogCount = 0;
    int jobCommitLogCount = 0;
    int abortLogCount = 0;
    Set<Integer> winnerJobSet = new HashSet<>();
    jobId2WinnerEntitiesMap = new HashMap<>();
    //set log reader to the lowWaterMarkLsn
    ILogRecord logRecord;
    logReader.initializeScan(lowWaterMarkLSN);
    logRecord = logReader.next();
    while (logRecord != null) {
        if (IS_DEBUG_MODE) {
            LOGGER.info(logRecord.getLogRecordForDisplay());
        }
        switch(logRecord.getLogType()) {
            case LogType.UPDATE:
                if (partitions.contains(logRecord.getResourcePartition())) {
                    updateLogCount++;
                }
                break;
            case LogType.JOB_COMMIT:
                winnerJobSet.add(logRecord.getJobId());
                cleanupJobCommits(logRecord.getJobId());
                jobCommitLogCount++;
                break;
            case LogType.ENTITY_COMMIT:
                if (partitions.contains(logRecord.getResourcePartition())) {
                    analyzeEntityCommitLog(logRecord);
                    entityCommitLogCount++;
                }
                break;
            case LogType.ABORT:
                abortLogCount++;
                break;
            case LogType.FLUSH:
            case LogType.WAIT:
            case LogType.MARKER:
                break;
            default:
                throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
        }
        logRecord = logReader.next();
    }
    //prepare winners for search after analysis is done to flush anything remaining in memory to disk.
    for (JobEntityCommits winners : jobId2WinnerEntitiesMap.values()) {
        winners.prepareForSearch();
    }
    LOGGER.info("Logs analysis phase completed.");
    LOGGER.info("Analysis log count update/entityCommit/jobCommit/abort = " + updateLogCount + "/" + entityCommitLogCount + "/" + jobCommitLogCount + "/" + abortLogCount);
    return winnerJobSet;
}
Also used : ILogRecord(org.apache.asterix.common.transactions.ILogRecord) Checkpoint(org.apache.asterix.common.transactions.Checkpoint) HashSet(java.util.HashSet) ACIDException(org.apache.asterix.common.exceptions.ACIDException)

Example 35 with ACIDException

use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.

the class TakeoverPartitionsRequestMessage method handle.

@Override
public void handle(INcApplicationContext appContext) throws HyracksDataException, InterruptedException {
    INCMessageBroker broker = (INCMessageBroker) appContext.getServiceContext().getMessageBroker();
    //if the NC is shutting down, it should ignore takeover partitions request
    if (!appContext.isShuttingdown()) {
        HyracksDataException hde = null;
        try {
            IRemoteRecoveryManager remoteRecoeryManager = appContext.getRemoteRecoveryManager();
            remoteRecoeryManager.takeoverPartitons(partitions);
        } catch (IOException | ACIDException e) {
            LOGGER.log(Level.SEVERE, "Failure taking over partitions", e);
            hde = HyracksDataException.suppress(hde, e);
        } finally {
            //send response after takeover is completed
            TakeoverPartitionsResponseMessage reponse = new TakeoverPartitionsResponseMessage(requestId, appContext.getTransactionSubsystem().getId(), partitions);
            try {
                broker.sendMessageToCC(reponse);
            } catch (Exception e) {
                LOGGER.log(Level.SEVERE, "Failure taking over partitions", e);
                hde = HyracksDataException.suppress(hde, e);
            }
        }
        if (hde != null) {
            throw hde;
        }
    }
}
Also used : IOException(java.io.IOException) INCMessageBroker(org.apache.asterix.common.messaging.api.INCMessageBroker) IRemoteRecoveryManager(org.apache.asterix.common.replication.IRemoteRecoveryManager) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ACIDException(org.apache.asterix.common.exceptions.ACIDException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IOException(java.io.IOException) ACIDException(org.apache.asterix.common.exceptions.ACIDException)

Aggregations

ACIDException (org.apache.asterix.common.exceptions.ACIDException)70 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)54 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)31 ITransactionContext (org.apache.asterix.common.transactions.ITransactionContext)14 IOException (java.io.IOException)9 ILSMIndex (org.apache.hyracks.storage.am.lsm.common.api.ILSMIndex)9 DatasetLocalResource (org.apache.asterix.common.dataflow.DatasetLocalResource)6 DatasetId (org.apache.asterix.common.transactions.DatasetId)5 ITransactionSubsystem (org.apache.asterix.common.transactions.ITransactionSubsystem)5 LogRecord (org.apache.asterix.common.transactions.LogRecord)5 RemoteException (java.rmi.RemoteException)4 Checkpoint (org.apache.asterix.common.transactions.Checkpoint)4 ILogRecord (org.apache.asterix.common.transactions.ILogRecord)4 IModificationOperationCallback (org.apache.hyracks.storage.common.IModificationOperationCallback)4 HashMap (java.util.HashMap)3 IDatasetLifecycleManager (org.apache.asterix.common.api.IDatasetLifecycleManager)3 INcApplicationContext (org.apache.asterix.common.api.INcApplicationContext)3 ITransactionManager (org.apache.asterix.common.transactions.ITransactionManager)3 MetadataTransactionContext (org.apache.asterix.metadata.MetadataTransactionContext)3 IIndex (org.apache.hyracks.storage.common.IIndex)3