use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.
the class ReplicationCheckpointManager method getDeadReplicasMinFirstLSN.
private long getDeadReplicasMinFirstLSN(Set<String> deadReplicaIds) {
final IReplicaResourcesManager remoteResourcesManager = txnSubsystem.getAsterixAppRuntimeContextProvider().getAppContext().getReplicaResourcesManager();
final IApplicationContext propertiesProvider = txnSubsystem.getAsterixAppRuntimeContextProvider().getAppContext();
final MetadataProperties metadataProperties = propertiesProvider.getMetadataProperties();
final PersistentLocalResourceRepository localResourceRepository = (PersistentLocalResourceRepository) txnSubsystem.getAsterixAppRuntimeContextProvider().getLocalResourceRepository();
// Get partitions of the dead replicas that are not active on this node
final Set<Integer> deadReplicasPartitions = new HashSet<>();
for (String deadReplicaId : deadReplicaIds) {
final ClusterPartition[] nodePartitons = metadataProperties.getNodePartitions().get(deadReplicaId);
for (ClusterPartition partition : nodePartitons) {
if (!localResourceRepository.getActivePartitions().contains(partition.getPartitionId())) {
deadReplicasPartitions.add(partition.getPartitionId());
}
}
}
return remoteResourcesManager.getPartitionsMinLSN(deadReplicasPartitions);
}
use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.
the class RemoteRecoveryManager method startFailbackProcess.
@Override
public void startFailbackProcess() {
int maxRecoveryAttempts = replicationProperties.getMaxRemoteRecoveryAttempts();
PersistentLocalResourceRepository resourceRepository = (PersistentLocalResourceRepository) runtimeContext.getLocalResourceRepository();
IDatasetLifecycleManager datasetLifeCycleManager = runtimeContext.getDatasetLifecycleManager();
Map<String, ClusterPartition[]> nodePartitions = runtimeContext.getMetadataProperties().getNodePartitions();
while (true) {
//start recovery steps
try {
if (maxRecoveryAttempts <= 0) {
//to avoid infinite loop in case of unexpected behavior.
throw new IllegalStateException("Failed to perform remote recovery.");
}
/*** Prepare for Recovery ***/
//1. check remote replicas states
replicationManager.initializeReplicasState();
int activeReplicasCount = replicationManager.getActiveReplicasCount();
if (activeReplicasCount == 0) {
throw new IllegalStateException("no ACTIVE remote replica(s) exists to perform remote recovery");
}
//2. clean any memory data that could've existed from previous failed recovery attempt
datasetLifeCycleManager.closeAllDatasets();
//3. remove any existing storage data and initialize storage metadata
resourceRepository.deleteStorageData(true);
resourceRepository.initializeNewUniverse(ClusterProperties.INSTANCE.getStorageDirectoryName());
//4. select remote replicas to recover from per lost replica data
failbackRecoveryReplicas = constructRemoteRecoveryPlan();
/*** Start Recovery Per Lost Replica ***/
for (Entry<String, Set<String>> remoteReplica : failbackRecoveryReplicas.entrySet()) {
String replicaId = remoteReplica.getKey();
Set<String> ncsToRecoverFor = remoteReplica.getValue();
Set<Integer> partitionsIds = new HashSet<>();
for (String node : ncsToRecoverFor) {
partitionsIds.addAll((Arrays.asList(nodePartitions.get(node))).stream().map(ClusterPartition::getPartitionId).collect(Collectors.toList()));
}
//1. Request indexes metadata and LSM components
replicationManager.requestReplicaFiles(replicaId, partitionsIds, new HashSet<String>());
}
break;
} catch (IOException e) {
if (LOGGER.isLoggable(Level.WARNING)) {
LOGGER.log(Level.WARNING, "Failed during remote recovery. Attempting again...", e);
}
maxRecoveryAttempts--;
}
}
}
use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.
the class NCApplication method start.
@Override
public void start(IServiceContext serviceCtx, String[] args) throws Exception {
if (args.length > 0) {
throw new IllegalArgumentException("Unrecognized argument(s): " + Arrays.toString(args));
}
this.ncServiceCtx = (INCServiceContext) serviceCtx;
ncServiceCtx.setThreadFactory(new AsterixThreadFactory(ncServiceCtx.getThreadFactory(), ncServiceCtx.getLifeCycleComponentManager()));
nodeId = this.ncServiceCtx.getNodeId();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Starting Asterix node controller: " + nodeId);
}
configureLoggingLevel(ncServiceCtx.getAppConfig().getLoggingLevel(ExternalProperties.Option.LOG_LEVEL));
final NodeControllerService controllerService = (NodeControllerService) ncServiceCtx.getControllerService();
if (System.getProperty("java.rmi.server.hostname") == null) {
System.setProperty("java.rmi.server.hostname", (controllerService).getConfiguration().getClusterPublicAddress());
}
runtimeContext = new NCAppRuntimeContext(this.ncServiceCtx, getExtensions());
MetadataProperties metadataProperties = runtimeContext.getMetadataProperties();
if (!metadataProperties.getNodeNames().contains(this.ncServiceCtx.getNodeId())) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Substitute node joining : " + this.ncServiceCtx.getNodeId());
}
updateOnNodeJoin();
}
runtimeContext.initialize(runtimeContext.getNodeProperties().isInitialRun());
MessagingProperties messagingProperties = runtimeContext.getMessagingProperties();
IMessageBroker messageBroker = new NCMessageBroker(controllerService, messagingProperties);
this.ncServiceCtx.setMessageBroker(messageBroker);
MessagingChannelInterfaceFactory interfaceFactory = new MessagingChannelInterfaceFactory((NCMessageBroker) messageBroker, messagingProperties);
this.ncServiceCtx.setMessagingChannelInterfaceFactory(interfaceFactory);
IRecoveryManager recoveryMgr = runtimeContext.getTransactionSubsystem().getRecoveryManager();
systemState = recoveryMgr.getSystemState();
if (systemState == SystemState.PERMANENT_DATA_LOSS) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("System state: " + SystemState.PERMANENT_DATA_LOSS);
LOGGER.info("Node ID: " + nodeId);
LOGGER.info("Stores: " + PrintUtil.toString(metadataProperties.getStores()));
LOGGER.info("Root Metadata Store: " + metadataProperties.getStores().get(nodeId)[0]);
}
PersistentLocalResourceRepository localResourceRepository = (PersistentLocalResourceRepository) runtimeContext.getLocalResourceRepository();
localResourceRepository.initializeNewUniverse(ClusterProperties.INSTANCE.getStorageDirectoryName());
}
webManager = new WebManager();
performLocalCleanUp();
}
use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.
the class PreparePartitionsFailbackRequestMessage method handle.
@Override
public void handle(INcApplicationContext appContext) throws HyracksDataException, InterruptedException {
INCMessageBroker broker = (INCMessageBroker) appContext.getServiceContext().getMessageBroker();
/**
* if the metadata partition will be failed back
* we need to flush and close all datasets including metadata datasets
* otherwise we need to close all non-metadata datasets and flush metadata datasets
* so that their memory components will be copied to the failing back node
*/
if (releaseMetadataNode) {
appContext.getDatasetLifecycleManager().closeAllDatasets();
//remove the metadata node stub from RMI registry
try {
appContext.unexportMetadataNodeStub();
} catch (RemoteException e) {
LOGGER.log(Level.SEVERE, "Failed unexporting metadata stub", e);
throw HyracksDataException.create(e);
}
} else {
//close all non-metadata datasets
appContext.getDatasetLifecycleManager().closeUserDatasets();
//flush the remaining metadata datasets that were not closed
appContext.getDatasetLifecycleManager().flushAllDatasets();
}
//mark the partitions to be closed as inactive
PersistentLocalResourceRepository localResourceRepo = (PersistentLocalResourceRepository) appContext.getLocalResourceRepository();
for (Integer partitionId : partitions) {
localResourceRepo.addInactivePartition(partitionId);
}
//send response after partitions prepared for failback
PreparePartitionsFailbackResponseMessage reponse = new PreparePartitionsFailbackResponseMessage(planId, requestId, partitions);
try {
broker.sendMessageToCC(reponse);
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Failed sending message to cc", e);
throw HyracksDataException.create(e);
}
}
use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.
the class NCApplication method performLocalCleanUp.
private void performLocalCleanUp() {
//Delete working area files from failed jobs
runtimeContext.getIoManager().deleteWorkspaceFiles();
//Reclaim storage for temporary datasets.
String storageDirName = ClusterProperties.INSTANCE.getStorageDirectoryName();
String[] ioDevices = ((PersistentLocalResourceRepository) runtimeContext.getLocalResourceRepository()).getStorageMountingPoints();
for (String ioDevice : ioDevices) {
String tempDatasetsDir = ioDevice + storageDirName + File.separator + StoragePathUtil.TEMP_DATASETS_STORAGE_FOLDER;
FileUtils.deleteQuietly(new File(tempDatasetsDir));
}
//TODO
//Reclaim storage for orphaned index artifacts in NCs.
//Note: currently LSM indexes invalid components are deleted when an index is activated.
}
Aggregations