Search in sources :

Example 1 with PersistentLocalResourceRepository

use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.

the class ReplicationCheckpointManager method getDeadReplicasMinFirstLSN.

private long getDeadReplicasMinFirstLSN(Set<String> deadReplicaIds) {
    final IReplicaResourcesManager remoteResourcesManager = txnSubsystem.getAsterixAppRuntimeContextProvider().getAppContext().getReplicaResourcesManager();
    final IApplicationContext propertiesProvider = txnSubsystem.getAsterixAppRuntimeContextProvider().getAppContext();
    final MetadataProperties metadataProperties = propertiesProvider.getMetadataProperties();
    final PersistentLocalResourceRepository localResourceRepository = (PersistentLocalResourceRepository) txnSubsystem.getAsterixAppRuntimeContextProvider().getLocalResourceRepository();
    // Get partitions of the dead replicas that are not active on this node
    final Set<Integer> deadReplicasPartitions = new HashSet<>();
    for (String deadReplicaId : deadReplicaIds) {
        final ClusterPartition[] nodePartitons = metadataProperties.getNodePartitions().get(deadReplicaId);
        for (ClusterPartition partition : nodePartitons) {
            if (!localResourceRepository.getActivePartitions().contains(partition.getPartitionId())) {
                deadReplicasPartitions.add(partition.getPartitionId());
            }
        }
    }
    return remoteResourcesManager.getPartitionsMinLSN(deadReplicasPartitions);
}
Also used : PersistentLocalResourceRepository(org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository) IReplicaResourcesManager(org.apache.asterix.common.replication.IReplicaResourcesManager) IApplicationContext(org.apache.asterix.common.api.IApplicationContext) MetadataProperties(org.apache.asterix.common.config.MetadataProperties) HashSet(java.util.HashSet) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 2 with PersistentLocalResourceRepository

use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.

the class RemoteRecoveryManager method startFailbackProcess.

@Override
public void startFailbackProcess() {
    int maxRecoveryAttempts = replicationProperties.getMaxRemoteRecoveryAttempts();
    PersistentLocalResourceRepository resourceRepository = (PersistentLocalResourceRepository) runtimeContext.getLocalResourceRepository();
    IDatasetLifecycleManager datasetLifeCycleManager = runtimeContext.getDatasetLifecycleManager();
    Map<String, ClusterPartition[]> nodePartitions = runtimeContext.getMetadataProperties().getNodePartitions();
    while (true) {
        //start recovery steps
        try {
            if (maxRecoveryAttempts <= 0) {
                //to avoid infinite loop in case of unexpected behavior.
                throw new IllegalStateException("Failed to perform remote recovery.");
            }
            /*** Prepare for Recovery ***/
            //1. check remote replicas states
            replicationManager.initializeReplicasState();
            int activeReplicasCount = replicationManager.getActiveReplicasCount();
            if (activeReplicasCount == 0) {
                throw new IllegalStateException("no ACTIVE remote replica(s) exists to perform remote recovery");
            }
            //2. clean any memory data that could've existed from previous failed recovery attempt
            datasetLifeCycleManager.closeAllDatasets();
            //3. remove any existing storage data and initialize storage metadata
            resourceRepository.deleteStorageData(true);
            resourceRepository.initializeNewUniverse(ClusterProperties.INSTANCE.getStorageDirectoryName());
            //4. select remote replicas to recover from per lost replica data
            failbackRecoveryReplicas = constructRemoteRecoveryPlan();
            /*** Start Recovery Per Lost Replica ***/
            for (Entry<String, Set<String>> remoteReplica : failbackRecoveryReplicas.entrySet()) {
                String replicaId = remoteReplica.getKey();
                Set<String> ncsToRecoverFor = remoteReplica.getValue();
                Set<Integer> partitionsIds = new HashSet<>();
                for (String node : ncsToRecoverFor) {
                    partitionsIds.addAll((Arrays.asList(nodePartitions.get(node))).stream().map(ClusterPartition::getPartitionId).collect(Collectors.toList()));
                }
                //1. Request indexes metadata and LSM components
                replicationManager.requestReplicaFiles(replicaId, partitionsIds, new HashSet<String>());
            }
            break;
        } catch (IOException e) {
            if (LOGGER.isLoggable(Level.WARNING)) {
                LOGGER.log(Level.WARNING, "Failed during remote recovery. Attempting again...", e);
            }
            maxRecoveryAttempts--;
        }
    }
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) IOException(java.io.IOException) IDatasetLifecycleManager(org.apache.asterix.common.api.IDatasetLifecycleManager) PersistentLocalResourceRepository(org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository) HashSet(java.util.HashSet) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 3 with PersistentLocalResourceRepository

use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.

the class NCApplication method start.

@Override
public void start(IServiceContext serviceCtx, String[] args) throws Exception {
    if (args.length > 0) {
        throw new IllegalArgumentException("Unrecognized argument(s): " + Arrays.toString(args));
    }
    this.ncServiceCtx = (INCServiceContext) serviceCtx;
    ncServiceCtx.setThreadFactory(new AsterixThreadFactory(ncServiceCtx.getThreadFactory(), ncServiceCtx.getLifeCycleComponentManager()));
    nodeId = this.ncServiceCtx.getNodeId();
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Starting Asterix node controller: " + nodeId);
    }
    configureLoggingLevel(ncServiceCtx.getAppConfig().getLoggingLevel(ExternalProperties.Option.LOG_LEVEL));
    final NodeControllerService controllerService = (NodeControllerService) ncServiceCtx.getControllerService();
    if (System.getProperty("java.rmi.server.hostname") == null) {
        System.setProperty("java.rmi.server.hostname", (controllerService).getConfiguration().getClusterPublicAddress());
    }
    runtimeContext = new NCAppRuntimeContext(this.ncServiceCtx, getExtensions());
    MetadataProperties metadataProperties = runtimeContext.getMetadataProperties();
    if (!metadataProperties.getNodeNames().contains(this.ncServiceCtx.getNodeId())) {
        if (LOGGER.isLoggable(Level.INFO)) {
            LOGGER.info("Substitute node joining : " + this.ncServiceCtx.getNodeId());
        }
        updateOnNodeJoin();
    }
    runtimeContext.initialize(runtimeContext.getNodeProperties().isInitialRun());
    MessagingProperties messagingProperties = runtimeContext.getMessagingProperties();
    IMessageBroker messageBroker = new NCMessageBroker(controllerService, messagingProperties);
    this.ncServiceCtx.setMessageBroker(messageBroker);
    MessagingChannelInterfaceFactory interfaceFactory = new MessagingChannelInterfaceFactory((NCMessageBroker) messageBroker, messagingProperties);
    this.ncServiceCtx.setMessagingChannelInterfaceFactory(interfaceFactory);
    IRecoveryManager recoveryMgr = runtimeContext.getTransactionSubsystem().getRecoveryManager();
    systemState = recoveryMgr.getSystemState();
    if (systemState == SystemState.PERMANENT_DATA_LOSS) {
        if (LOGGER.isLoggable(Level.INFO)) {
            LOGGER.info("System state: " + SystemState.PERMANENT_DATA_LOSS);
            LOGGER.info("Node ID: " + nodeId);
            LOGGER.info("Stores: " + PrintUtil.toString(metadataProperties.getStores()));
            LOGGER.info("Root Metadata Store: " + metadataProperties.getStores().get(nodeId)[0]);
        }
        PersistentLocalResourceRepository localResourceRepository = (PersistentLocalResourceRepository) runtimeContext.getLocalResourceRepository();
        localResourceRepository.initializeNewUniverse(ClusterProperties.INSTANCE.getStorageDirectoryName());
    }
    webManager = new WebManager();
    performLocalCleanUp();
}
Also used : NodeControllerService(org.apache.hyracks.control.nc.NodeControllerService) PersistentLocalResourceRepository(org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository) MessagingChannelInterfaceFactory(org.apache.asterix.messaging.MessagingChannelInterfaceFactory) WebManager(org.apache.hyracks.http.server.WebManager) NCAppRuntimeContext(org.apache.asterix.app.nc.NCAppRuntimeContext) IMessageBroker(org.apache.hyracks.api.messages.IMessageBroker) MetadataProperties(org.apache.asterix.common.config.MetadataProperties) NCMessageBroker(org.apache.asterix.messaging.NCMessageBroker) AsterixThreadFactory(org.apache.asterix.common.api.AsterixThreadFactory) MessagingProperties(org.apache.asterix.common.config.MessagingProperties) IRecoveryManager(org.apache.asterix.common.transactions.IRecoveryManager)

Example 4 with PersistentLocalResourceRepository

use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.

the class PreparePartitionsFailbackRequestMessage method handle.

@Override
public void handle(INcApplicationContext appContext) throws HyracksDataException, InterruptedException {
    INCMessageBroker broker = (INCMessageBroker) appContext.getServiceContext().getMessageBroker();
    /**
         * if the metadata partition will be failed back
         * we need to flush and close all datasets including metadata datasets
         * otherwise we need to close all non-metadata datasets and flush metadata datasets
         * so that their memory components will be copied to the failing back node
         */
    if (releaseMetadataNode) {
        appContext.getDatasetLifecycleManager().closeAllDatasets();
        //remove the metadata node stub from RMI registry
        try {
            appContext.unexportMetadataNodeStub();
        } catch (RemoteException e) {
            LOGGER.log(Level.SEVERE, "Failed unexporting metadata stub", e);
            throw HyracksDataException.create(e);
        }
    } else {
        //close all non-metadata datasets
        appContext.getDatasetLifecycleManager().closeUserDatasets();
        //flush the remaining metadata datasets that were not closed
        appContext.getDatasetLifecycleManager().flushAllDatasets();
    }
    //mark the partitions to be closed as inactive
    PersistentLocalResourceRepository localResourceRepo = (PersistentLocalResourceRepository) appContext.getLocalResourceRepository();
    for (Integer partitionId : partitions) {
        localResourceRepo.addInactivePartition(partitionId);
    }
    //send response after partitions prepared for failback
    PreparePartitionsFailbackResponseMessage reponse = new PreparePartitionsFailbackResponseMessage(planId, requestId, partitions);
    try {
        broker.sendMessageToCC(reponse);
    } catch (Exception e) {
        LOGGER.log(Level.SEVERE, "Failed sending message to cc", e);
        throw HyracksDataException.create(e);
    }
}
Also used : PersistentLocalResourceRepository(org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository) INCMessageBroker(org.apache.asterix.common.messaging.api.INCMessageBroker) RemoteException(java.rmi.RemoteException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) RemoteException(java.rmi.RemoteException)

Example 5 with PersistentLocalResourceRepository

use of org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository in project asterixdb by apache.

the class NCApplication method performLocalCleanUp.

private void performLocalCleanUp() {
    //Delete working area files from failed jobs
    runtimeContext.getIoManager().deleteWorkspaceFiles();
    //Reclaim storage for temporary datasets.
    String storageDirName = ClusterProperties.INSTANCE.getStorageDirectoryName();
    String[] ioDevices = ((PersistentLocalResourceRepository) runtimeContext.getLocalResourceRepository()).getStorageMountingPoints();
    for (String ioDevice : ioDevices) {
        String tempDatasetsDir = ioDevice + storageDirName + File.separator + StoragePathUtil.TEMP_DATASETS_STORAGE_FOLDER;
        FileUtils.deleteQuietly(new File(tempDatasetsDir));
    }
//TODO
//Reclaim storage for orphaned index artifacts in NCs.
//Note: currently LSM indexes invalid components are deleted when an index is activated.
}
Also used : PersistentLocalResourceRepository(org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository) File(java.io.File)

Aggregations

PersistentLocalResourceRepository (org.apache.asterix.transaction.management.resource.PersistentLocalResourceRepository)7 HashSet (java.util.HashSet)4 IOException (java.io.IOException)2 Set (java.util.Set)2 IDatasetLifecycleManager (org.apache.asterix.common.api.IDatasetLifecycleManager)2 ClusterPartition (org.apache.asterix.common.cluster.ClusterPartition)2 MetadataProperties (org.apache.asterix.common.config.MetadataProperties)2 File (java.io.File)1 RemoteException (java.rmi.RemoteException)1 NCAppRuntimeContext (org.apache.asterix.app.nc.NCAppRuntimeContext)1 AsterixThreadFactory (org.apache.asterix.common.api.AsterixThreadFactory)1 IApplicationContext (org.apache.asterix.common.api.IApplicationContext)1 MessagingProperties (org.apache.asterix.common.config.MessagingProperties)1 INCMessageBroker (org.apache.asterix.common.messaging.api.INCMessageBroker)1 IReplicaResourcesManager (org.apache.asterix.common.replication.IReplicaResourcesManager)1 ILogManager (org.apache.asterix.common.transactions.ILogManager)1 IRecoveryManager (org.apache.asterix.common.transactions.IRecoveryManager)1 MessagingChannelInterfaceFactory (org.apache.asterix.messaging.MessagingChannelInterfaceFactory)1 NCMessageBroker (org.apache.asterix.messaging.NCMessageBroker)1 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)1