use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.
the class RecoveryManager method startRecoveryRedoPhase.
private synchronized void startRecoveryRedoPhase(Set<Integer> partitions, ILogReader logReader, long lowWaterMarkLSN, Set<Integer> winnerJobSet) throws IOException, ACIDException {
int redoCount = 0;
int jobId = -1;
long resourceId;
long maxDiskLastLsn;
long lsn = -1;
ILSMIndex index = null;
LocalResource localResource = null;
DatasetLocalResource localResourceMetadata = null;
boolean foundWinner = false;
JobEntityCommits jobEntityWinners = null;
IAppRuntimeContextProvider appRuntimeContext = txnSubsystem.getAsterixAppRuntimeContextProvider();
IDatasetLifecycleManager datasetLifecycleManager = appRuntimeContext.getDatasetLifecycleManager();
Map<Long, LocalResource> resourcesMap = localResourceRepository.loadAndGetAllResources();
Map<Long, Long> resourceId2MaxLSNMap = new HashMap<>();
TxnId tempKeyTxnId = new TxnId(-1, -1, -1, null, -1, false);
ILogRecord logRecord = null;
try {
logReader.initializeScan(lowWaterMarkLSN);
logRecord = logReader.next();
while (logRecord != null) {
if (IS_DEBUG_MODE) {
LOGGER.info(logRecord.getLogRecordForDisplay());
}
lsn = logRecord.getLSN();
jobId = logRecord.getJobId();
foundWinner = false;
switch(logRecord.getLogType()) {
case LogType.UPDATE:
if (partitions.contains(logRecord.getResourcePartition())) {
if (winnerJobSet.contains(jobId)) {
foundWinner = true;
} else if (jobId2WinnerEntitiesMap.containsKey(jobId)) {
jobEntityWinners = jobId2WinnerEntitiesMap.get(jobId);
tempKeyTxnId.setTxnId(jobId, logRecord.getDatasetId(), logRecord.getPKHashValue(), logRecord.getPKValue(), logRecord.getPKValueSize());
if (jobEntityWinners.containsEntityCommitForTxnId(lsn, tempKeyTxnId)) {
foundWinner = true;
}
}
if (foundWinner) {
resourceId = logRecord.getResourceId();
localResource = resourcesMap.get(resourceId);
/*******************************************************************
* [Notice]
* -> Issue
* Delete index may cause a problem during redo.
* The index operation to be redone couldn't be redone because the corresponding index
* may not exist in NC due to the possible index drop DDL operation.
* -> Approach
* Avoid the problem during redo.
* More specifically, the problem will be detected when the localResource of
* the corresponding index is retrieved, which will end up with 'null'.
* If null is returned, then just go and process the next
* log record.
*******************************************************************/
if (localResource == null) {
LOGGER.log(Level.WARNING, "resource was not found for resource id " + resourceId);
logRecord = logReader.next();
continue;
}
/*******************************************************************/
//get index instance from IndexLifeCycleManager
//if index is not registered into IndexLifeCycleManager,
//create the index using LocalMetadata stored in LocalResourceRepository
//get partition path in this node
localResourceMetadata = (DatasetLocalResource) localResource.getResource();
index = (ILSMIndex) datasetLifecycleManager.get(localResource.getPath());
if (index == null) {
//#. create index instance and register to indexLifeCycleManager
index = (ILSMIndex) localResourceMetadata.createInstance(serviceCtx);
datasetLifecycleManager.register(localResource.getPath(), index);
datasetLifecycleManager.open(localResource.getPath());
//#. get maxDiskLastLSN
ILSMIndex lsmIndex = index;
try {
maxDiskLastLsn = ((AbstractLSMIOOperationCallback) lsmIndex.getIOOperationCallback()).getComponentLSN(lsmIndex.getImmutableComponents());
} catch (HyracksDataException e) {
datasetLifecycleManager.close(localResource.getPath());
throw e;
}
//#. set resourceId and maxDiskLastLSN to the map
resourceId2MaxLSNMap.put(resourceId, maxDiskLastLsn);
} else {
maxDiskLastLsn = resourceId2MaxLSNMap.get(resourceId);
}
if (lsn > maxDiskLastLsn) {
redo(logRecord, datasetLifecycleManager);
redoCount++;
}
}
}
break;
case LogType.JOB_COMMIT:
case LogType.ENTITY_COMMIT:
case LogType.ABORT:
case LogType.FLUSH:
case LogType.WAIT:
case LogType.MARKER:
//do nothing
break;
default:
throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
}
logRecord = logReader.next();
}
LOGGER.info("Logs REDO phase completed. Redo logs count: " + redoCount);
} finally {
//close all indexes
Set<Long> resourceIdList = resourceId2MaxLSNMap.keySet();
for (long r : resourceIdList) {
datasetLifecycleManager.close(resourcesMap.get(r).getPath());
}
}
}
use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.
the class RecoveryManager method rollbackTransaction.
@Override
public void rollbackTransaction(ITransactionContext txnContext) throws ACIDException {
int abortedJobId = txnContext.getJobId().getId();
// Obtain the first/last log record LSNs written by the Job
long firstLSN = txnContext.getFirstLSN();
/**
* The effect of any log record with LSN below minFirstLSN has already been written to disk and
* will not be rolled back. Therefore, we will set the first LSN of the job to the maximum of
* minFirstLSN and the job's first LSN.
*/
try {
long localMinFirstLSN = getLocalMinFirstLSN();
firstLSN = Math.max(firstLSN, localMinFirstLSN);
} catch (HyracksDataException e) {
throw new ACIDException(e);
}
long lastLSN = txnContext.getLastLSN();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("rollbacking transaction log records from " + firstLSN + " to " + lastLSN);
}
// check if the transaction actually wrote some logs.
if (firstLSN == TransactionManagementConstants.LogManagerConstants.TERMINAL_LSN || firstLSN > lastLSN) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("no need to roll back as there were no operations by the job " + txnContext.getJobId());
}
return;
}
// While reading log records from firstLsn to lastLsn, collect uncommitted txn's Lsns
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("collecting loser transaction's LSNs from " + firstLSN + " to " + lastLSN);
}
Map<TxnId, List<Long>> jobLoserEntity2LSNsMap = new HashMap<>();
TxnId tempKeyTxnId = new TxnId(-1, -1, -1, null, -1, false);
int updateLogCount = 0;
int entityCommitLogCount = 0;
int logJobId = -1;
long currentLSN = -1;
TxnId loserEntity = null;
List<Long> undoLSNSet = null;
//get active partitions on this node
Set<Integer> activePartitions = localResourceRepository.getActivePartitions();
ILogReader logReader = logMgr.getLogReader(false);
try {
logReader.initializeScan(firstLSN);
ILogRecord logRecord = null;
while (currentLSN < lastLSN) {
logRecord = logReader.next();
if (logRecord == null) {
break;
} else {
currentLSN = logRecord.getLSN();
if (IS_DEBUG_MODE) {
LOGGER.info(logRecord.getLogRecordForDisplay());
}
}
logJobId = logRecord.getJobId();
if (logJobId != abortedJobId) {
continue;
}
tempKeyTxnId.setTxnId(logJobId, logRecord.getDatasetId(), logRecord.getPKHashValue(), logRecord.getPKValue(), logRecord.getPKValueSize());
switch(logRecord.getLogType()) {
case LogType.UPDATE:
if (activePartitions.contains(logRecord.getResourcePartition())) {
undoLSNSet = jobLoserEntity2LSNsMap.get(tempKeyTxnId);
if (undoLSNSet == null) {
loserEntity = new TxnId(logJobId, logRecord.getDatasetId(), logRecord.getPKHashValue(), logRecord.getPKValue(), logRecord.getPKValueSize(), true);
undoLSNSet = new LinkedList<>();
jobLoserEntity2LSNsMap.put(loserEntity, undoLSNSet);
}
undoLSNSet.add(currentLSN);
updateLogCount++;
if (IS_DEBUG_MODE) {
LOGGER.info(Thread.currentThread().getId() + "======> update[" + currentLSN + "]:" + tempKeyTxnId);
}
}
break;
case LogType.ENTITY_COMMIT:
if (activePartitions.contains(logRecord.getResourcePartition())) {
jobLoserEntity2LSNsMap.remove(tempKeyTxnId);
entityCommitLogCount++;
if (IS_DEBUG_MODE) {
LOGGER.info(Thread.currentThread().getId() + "======> entity_commit[" + currentLSN + "]" + tempKeyTxnId);
}
}
break;
case LogType.JOB_COMMIT:
throw new ACIDException("Unexpected LogType(" + logRecord.getLogType() + ") during abort.");
case LogType.ABORT:
case LogType.FLUSH:
case LogType.WAIT:
case LogType.MARKER:
//ignore
break;
default:
throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
}
}
if (currentLSN != lastLSN) {
throw new ACIDException("LastLSN mismatch: lastLSN(" + lastLSN + ") vs currentLSN(" + currentLSN + ") during abort( " + txnContext.getJobId() + ")");
}
//undo loserTxn's effect
LOGGER.log(Level.INFO, "undoing loser transaction's effect");
IDatasetLifecycleManager datasetLifecycleManager = txnSubsystem.getAsterixAppRuntimeContextProvider().getDatasetLifecycleManager();
//TODO sort loser entities by smallest LSN to undo in one pass.
Iterator<Entry<TxnId, List<Long>>> iter = jobLoserEntity2LSNsMap.entrySet().iterator();
int undoCount = 0;
while (iter.hasNext()) {
Map.Entry<TxnId, List<Long>> loserEntity2LSNsMap = iter.next();
undoLSNSet = loserEntity2LSNsMap.getValue();
// The step below is important since the upsert operations must be done in reverse order.
Collections.reverse(undoLSNSet);
for (long undoLSN : undoLSNSet) {
//here, all the log records are UPDATE type. So, we don't need to check the type again.
//read the corresponding log record to be undone.
logRecord = logReader.read(undoLSN);
if (logRecord == null) {
throw new ACIDException("IllegalState exception during abort( " + txnContext.getJobId() + ")");
}
if (IS_DEBUG_MODE) {
LOGGER.info(logRecord.getLogRecordForDisplay());
}
undo(logRecord, datasetLifecycleManager);
undoCount++;
}
}
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("undone loser transaction's effect");
LOGGER.info("[RecoveryManager's rollback log count] update/entityCommit/undo:" + updateLogCount + "/" + entityCommitLogCount + "/" + undoCount);
}
} finally {
logReader.close();
}
}
use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.
the class QueryTranslator method handleCreateFeedPolicyStatement.
protected void handleCreateFeedPolicyStatement(MetadataProvider metadataProvider, Statement stmt) throws AlgebricksException, HyracksDataException {
String dataverse;
String policy;
FeedPolicyEntity newPolicy = null;
MetadataTransactionContext mdTxnCtx = null;
CreateFeedPolicyStatement cfps = (CreateFeedPolicyStatement) stmt;
dataverse = getActiveDataverse(null);
policy = cfps.getPolicyName();
MetadataLockManager.INSTANCE.createFeedPolicyBegin(metadataProvider.getLocks(), dataverse, dataverse + "." + policy);
try {
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
FeedPolicyEntity feedPolicy = MetadataManager.INSTANCE.getFeedPolicy(metadataProvider.getMetadataTxnContext(), dataverse, policy);
if (feedPolicy != null) {
if (cfps.getIfNotExists()) {
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
return;
} else {
throw new AlgebricksException("A policy with this name " + policy + " already exists.");
}
}
boolean extendingExisting = cfps.getSourcePolicyName() != null;
String description = cfps.getDescription() == null ? "" : cfps.getDescription();
if (extendingExisting) {
FeedPolicyEntity sourceFeedPolicy = MetadataManager.INSTANCE.getFeedPolicy(metadataProvider.getMetadataTxnContext(), dataverse, cfps.getSourcePolicyName());
if (sourceFeedPolicy == null) {
sourceFeedPolicy = MetadataManager.INSTANCE.getFeedPolicy(metadataProvider.getMetadataTxnContext(), MetadataConstants.METADATA_DATAVERSE_NAME, cfps.getSourcePolicyName());
if (sourceFeedPolicy == null) {
throw new AlgebricksException("Unknown policy " + cfps.getSourcePolicyName());
}
}
Map<String, String> policyProperties = sourceFeedPolicy.getProperties();
policyProperties.putAll(cfps.getProperties());
newPolicy = new FeedPolicyEntity(dataverse, policy, description, policyProperties);
} else {
Properties prop = new Properties();
try {
InputStream stream = new FileInputStream(cfps.getSourcePolicyFile());
prop.load(stream);
} catch (Exception e) {
throw new AlgebricksException("Unable to read policy file" + cfps.getSourcePolicyFile(), e);
}
Map<String, String> policyProperties = new HashMap<>();
for (Entry<Object, Object> entry : prop.entrySet()) {
policyProperties.put((String) entry.getKey(), (String) entry.getValue());
}
newPolicy = new FeedPolicyEntity(dataverse, policy, description, policyProperties);
}
MetadataManager.INSTANCE.addFeedPolicy(mdTxnCtx, newPolicy);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (RemoteException | ACIDException e) {
abort(e, e, mdTxnCtx);
throw new HyracksDataException(e);
} finally {
metadataProvider.getLocks().unlock();
}
}
use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.
the class RecoveryManager method startRecoverysAnalysisPhase.
private synchronized Set<Integer> startRecoverysAnalysisPhase(Set<Integer> partitions, ILogReader logReader, long lowWaterMarkLSN) throws IOException, ACIDException {
int updateLogCount = 0;
int entityCommitLogCount = 0;
int jobCommitLogCount = 0;
int abortLogCount = 0;
Set<Integer> winnerJobSet = new HashSet<>();
jobId2WinnerEntitiesMap = new HashMap<>();
//set log reader to the lowWaterMarkLsn
ILogRecord logRecord;
logReader.initializeScan(lowWaterMarkLSN);
logRecord = logReader.next();
while (logRecord != null) {
if (IS_DEBUG_MODE) {
LOGGER.info(logRecord.getLogRecordForDisplay());
}
switch(logRecord.getLogType()) {
case LogType.UPDATE:
if (partitions.contains(logRecord.getResourcePartition())) {
updateLogCount++;
}
break;
case LogType.JOB_COMMIT:
winnerJobSet.add(logRecord.getJobId());
cleanupJobCommits(logRecord.getJobId());
jobCommitLogCount++;
break;
case LogType.ENTITY_COMMIT:
if (partitions.contains(logRecord.getResourcePartition())) {
analyzeEntityCommitLog(logRecord);
entityCommitLogCount++;
}
break;
case LogType.ABORT:
abortLogCount++;
break;
case LogType.FLUSH:
case LogType.WAIT:
case LogType.MARKER:
break;
default:
throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
}
logRecord = logReader.next();
}
//prepare winners for search after analysis is done to flush anything remaining in memory to disk.
for (JobEntityCommits winners : jobId2WinnerEntitiesMap.values()) {
winners.prepareForSearch();
}
LOGGER.info("Logs analysis phase completed.");
LOGGER.info("Analysis log count update/entityCommit/jobCommit/abort = " + updateLogCount + "/" + entityCommitLogCount + "/" + jobCommitLogCount + "/" + abortLogCount);
return winnerJobSet;
}
use of org.apache.asterix.common.exceptions.ACIDException in project asterixdb by apache.
the class TakeoverPartitionsRequestMessage method handle.
@Override
public void handle(INcApplicationContext appContext) throws HyracksDataException, InterruptedException {
INCMessageBroker broker = (INCMessageBroker) appContext.getServiceContext().getMessageBroker();
//if the NC is shutting down, it should ignore takeover partitions request
if (!appContext.isShuttingdown()) {
HyracksDataException hde = null;
try {
IRemoteRecoveryManager remoteRecoeryManager = appContext.getRemoteRecoveryManager();
remoteRecoeryManager.takeoverPartitons(partitions);
} catch (IOException | ACIDException e) {
LOGGER.log(Level.SEVERE, "Failure taking over partitions", e);
hde = HyracksDataException.suppress(hde, e);
} finally {
//send response after takeover is completed
TakeoverPartitionsResponseMessage reponse = new TakeoverPartitionsResponseMessage(requestId, appContext.getTransactionSubsystem().getId(), partitions);
try {
broker.sendMessageToCC(reponse);
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Failure taking over partitions", e);
hde = HyracksDataException.suppress(hde, e);
}
}
if (hde != null) {
throw hde;
}
}
}
Aggregations