use of org.apache.asterix.replication.storage.ReplicaResourcesManager in project asterixdb by apache.
the class RemoteRecoveryManager method completeFailbackProcess.
@Override
public void completeFailbackProcess() throws IOException, InterruptedException {
ILogManager logManager = runtimeContext.getTransactionSubsystem().getLogManager();
ReplicaResourcesManager replicaResourcesManager = (ReplicaResourcesManager) runtimeContext.getReplicaResourcesManager();
Map<String, ClusterPartition[]> nodePartitions = runtimeContext.getMetadataProperties().getNodePartitions();
/**
* for each lost partition, get the remaining files from replicas
* to complete the failback process.
*/
try {
for (Entry<String, Set<String>> remoteReplica : failbackRecoveryReplicas.entrySet()) {
String replicaId = remoteReplica.getKey();
Set<String> NCsDataToRecover = remoteReplica.getValue();
Set<String> existingFiles = new HashSet<>();
Set<Integer> partitionsToRecover = new HashSet<>();
for (String nodeId : NCsDataToRecover) {
//get partitions that will be recovered from this node
ClusterPartition[] replicaPartitions = nodePartitions.get(nodeId);
for (ClusterPartition partition : replicaPartitions) {
existingFiles.addAll(replicaResourcesManager.getPartitionIndexesFiles(partition.getPartitionId(), true));
partitionsToRecover.add(partition.getPartitionId());
}
}
//Request remaining indexes files
replicationManager.requestReplicaFiles(replicaId, partitionsToRecover, existingFiles);
}
} catch (IOException e) {
/**
* in case of failure during failback completion process we need to construct a new plan
* and get all the files from the start since the remote replicas will change in the new plan.
*/
if (LOGGER.isLoggable(Level.WARNING)) {
LOGGER.log(Level.WARNING, "Failed during completing failback. Restarting failback process...", e);
}
startFailbackProcess();
}
//get max LSN from selected remote replicas
long maxRemoteLSN = replicationManager.getMaxRemoteLSN(failbackRecoveryReplicas.keySet());
//6. force LogManager to start from a partition > maxLSN in selected remote replicas
logManager.renewLogFilesAndStartFromLSN(maxRemoteLSN);
//start replication service after failback completed
runtimeContext.getReplicationChannel().start();
runtimeContext.getReplicationManager().startReplicationThreads();
failbackRecoveryReplicas = null;
}
use of org.apache.asterix.replication.storage.ReplicaResourcesManager in project asterixdb by apache.
the class NCAppRuntimeContext method initialize.
@Override
public void initialize(boolean initialRun) throws IOException, ACIDException {
ioManager = getServiceContext().getIoManager();
threadExecutor = new ThreadExecutor(getServiceContext().getThreadFactory());
fileMapManager = new FileMapManager(ioManager);
ICacheMemoryAllocator allocator = new HeapBufferAllocator();
IPageCleanerPolicy pcp = new DelayPageCleanerPolicy(600000);
IPageReplacementStrategy prs = new ClockPageReplacementStrategy(allocator, storageProperties.getBufferCachePageSize(), storageProperties.getBufferCacheNumPages());
AsynchronousScheduler.INSTANCE.init(getServiceContext().getThreadFactory());
lsmIOScheduler = AsynchronousScheduler.INSTANCE;
metadataMergePolicyFactory = new PrefixMergePolicyFactory();
ILocalResourceRepositoryFactory persistentLocalResourceRepositoryFactory = new PersistentLocalResourceRepositoryFactory(ioManager, getServiceContext().getNodeId(), metadataProperties);
localResourceRepository = (PersistentLocalResourceRepository) persistentLocalResourceRepositoryFactory.createRepository();
IAppRuntimeContextProvider asterixAppRuntimeContextProvider = new AppRuntimeContextProviderForRecovery(this);
txnSubsystem = new TransactionSubsystem(getServiceContext(), getServiceContext().getNodeId(), asterixAppRuntimeContextProvider, txnProperties);
IRecoveryManager recoveryMgr = txnSubsystem.getRecoveryManager();
SystemState systemState = recoveryMgr.getSystemState();
if (initialRun || systemState == SystemState.PERMANENT_DATA_LOSS) {
//delete any storage data before the resource factory is initialized
localResourceRepository.deleteStorageData(true);
}
datasetLifecycleManager = new DatasetLifecycleManager(storageProperties, localResourceRepository, MetadataIndexImmutableProperties.FIRST_AVAILABLE_USER_DATASET_ID, txnSubsystem.getLogManager(), ioManager.getIODevices().size());
isShuttingdown = false;
activeManager = new ActiveManager(threadExecutor, getServiceContext().getNodeId(), activeProperties.getMemoryComponentGlobalBudget(), compilerProperties.getFrameSize());
if (replicationProperties.isParticipant(getServiceContext().getNodeId())) {
String nodeId = getServiceContext().getNodeId();
replicaResourcesManager = new ReplicaResourcesManager(localResourceRepository, metadataProperties);
replicationManager = new ReplicationManager(nodeId, replicationProperties, replicaResourcesManager, txnSubsystem.getLogManager(), asterixAppRuntimeContextProvider);
//pass replication manager to replication required object
//LogManager to replicate logs
txnSubsystem.getLogManager().setReplicationManager(replicationManager);
//PersistentLocalResourceRepository to replicate metadata files and delete backups on drop index
localResourceRepository.setReplicationManager(replicationManager);
/*
* add the partitions that will be replicated in this node as inactive partitions
*/
//get nodes which replicate to this node
Set<String> remotePrimaryReplicas = replicationProperties.getRemotePrimaryReplicasIds(nodeId);
for (String clientId : remotePrimaryReplicas) {
//get the partitions of each client
ClusterPartition[] clientPartitions = metadataProperties.getNodePartitions().get(clientId);
for (ClusterPartition partition : clientPartitions) {
localResourceRepository.addInactivePartition(partition.getPartitionId());
}
}
//initialize replication channel
replicationChannel = new ReplicationChannel(nodeId, replicationProperties, txnSubsystem.getLogManager(), replicaResourcesManager, replicationManager, getServiceContext(), asterixAppRuntimeContextProvider);
remoteRecoveryManager = new RemoteRecoveryManager(replicationManager, this, replicationProperties);
bufferCache = new BufferCache(ioManager, prs, pcp, fileMapManager, storageProperties.getBufferCacheMaxOpenFiles(), getServiceContext().getThreadFactory(), replicationManager);
} else {
bufferCache = new BufferCache(ioManager, prs, pcp, fileMapManager, storageProperties.getBufferCacheMaxOpenFiles(), getServiceContext().getThreadFactory());
}
/*
* The order of registration is important. The buffer cache must registered before recovery and transaction
* managers. Notes: registered components are stopped in reversed order
*/
ILifeCycleComponentManager lccm = getServiceContext().getLifeCycleComponentManager();
lccm.register((ILifeCycleComponent) bufferCache);
/*
* LogManager must be stopped after RecoveryManager, DatasetLifeCycleManager, and ReplicationManager
* to process any logs that might be generated during stopping these components
*/
lccm.register((ILifeCycleComponent) txnSubsystem.getLogManager());
/*
* ReplicationManager must be stopped after indexLifecycleManager and recovery manager
* so that any logs/files generated during closing datasets or checkpoints are sent to remote replicas
*/
if (replicationManager != null) {
lccm.register(replicationManager);
}
lccm.register((ILifeCycleComponent) txnSubsystem.getRecoveryManager());
/*
* Stopping indexLifecycleManager will flush and close all datasets.
*/
lccm.register((ILifeCycleComponent) datasetLifecycleManager);
lccm.register((ILifeCycleComponent) txnSubsystem.getTransactionManager());
lccm.register((ILifeCycleComponent) txnSubsystem.getLockManager());
lccm.register(txnSubsystem.getCheckpointManager());
}
Aggregations