Search in sources :

Example 16 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class RetentionLeaseIT method testRetentionLeasesSyncOnRecovery.

public void testRetentionLeasesSyncOnRecovery() throws Exception {
    final int numberOfReplicas = 2 - scaledRandomIntBetween(0, 2);
    internalCluster().ensureAtLeastNumDataNodes(1 + numberOfReplicas);
    /*
         * We effectively disable the background sync to ensure that the retention leases are not synced in the background so that the only
         * source of retention leases on the replicas would be from recovery.
         */
    final Settings.Builder settings = Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), TimeValue.timeValueHours(24));
    // when we increase the number of replicas below we want to exclude the replicas from being allocated so that they do not recover
    assertAcked(prepareCreate("index", 1, settings));
    ensureYellow("index");
    final AcknowledgedResponse response = client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.number_of_replicas", numberOfReplicas).build()).get();
    assertTrue(response.isAcknowledged());
    final String primaryShardNodeId = clusterService().state().routingTable().index("index").shard(0).primaryShard().currentNodeId();
    final String primaryShardNodeName = clusterService().state().nodes().get(primaryShardNodeId).getName();
    final IndexShard primary = internalCluster().getInstance(IndicesService.class, primaryShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
    final int length = randomIntBetween(1, 8);
    final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
    logger.info("adding retention [{}}] leases", length);
    for (int i = 0; i < length; i++) {
        final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
        final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
        final String source = randomAlphaOfLength(8);
        final CountDownLatch latch = new CountDownLatch(1);
        final ActionListener<ReplicationResponse> listener = countDownLatchListener(latch);
        currentRetentionLeases.put(id, primary.addRetentionLease(id, retainingSequenceNumber, source, listener));
        latch.await();
    }
    logger.info("finished adding [{}] retention leases", length);
    // cause some recoveries to fail to ensure that retention leases are handled properly when retrying a recovery
    assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder().put(INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), TimeValue.timeValueMillis(100))));
    final Semaphore recoveriesToDisrupt = new Semaphore(scaledRandomIntBetween(0, 4));
    final MockTransportService primaryTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, primaryShardNodeName);
    primaryTransportService.addSendBehavior((connection, requestId, action, request, options) -> {
        if (action.equals(PeerRecoveryTargetService.Actions.FINALIZE) && recoveriesToDisrupt.tryAcquire()) {
            if (randomBoolean()) {
                // return a ConnectTransportException to the START_RECOVERY action
                final TransportService replicaTransportService = internalCluster().getInstance(TransportService.class, connection.getNode().getName());
                final DiscoveryNode primaryNode = primaryTransportService.getLocalNode();
                replicaTransportService.disconnectFromNode(primaryNode);
                replicaTransportService.connectToNode(primaryNode);
            } else {
                // return an exception to the FINALIZE action
                throw new OpenSearchException("failing recovery for test purposes");
            }
        }
        connection.sendRequest(requestId, action, request, options);
    });
    logger.info("allow [{}] replicas to allocate", numberOfReplicas);
    // now allow the replicas to be allocated and wait for recovery to finalize
    allowNodes("index", 1 + numberOfReplicas);
    ensureGreen("index");
    // check current retention leases have been synced to all replicas
    for (final ShardRouting replicaShard : clusterService().state().routingTable().index("index").shard(0).replicaShards()) {
        final String replicaShardNodeId = replicaShard.currentNodeId();
        final String replicaShardNodeName = clusterService().state().nodes().get(replicaShardNodeId).getName();
        final IndexShard replica = internalCluster().getInstance(IndicesService.class, replicaShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
        final Map<String, RetentionLease> retentionLeasesOnReplica = RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(replica.getRetentionLeases());
        assertThat(retentionLeasesOnReplica, equalTo(currentRetentionLeases));
        // check retention leases have been written on the replica; see RecoveryTarget#finalizeRecovery
        assertThat(currentRetentionLeases, equalTo(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(replica.loadRetentionLeases())));
    }
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) MockTransportService(org.opensearch.test.transport.MockTransportService) IndexShard(org.opensearch.index.shard.IndexShard) AcknowledgedResponse(org.opensearch.action.support.master.AcknowledgedResponse) IndicesService(org.opensearch.indices.IndicesService) Semaphore(java.util.concurrent.Semaphore) CountDownLatch(java.util.concurrent.CountDownLatch) LinkedHashMap(java.util.LinkedHashMap) ReplicationResponse(org.opensearch.action.support.replication.ReplicationResponse) ShardId(org.opensearch.index.shard.ShardId) MockTransportService(org.opensearch.test.transport.MockTransportService) TransportService(org.opensearch.transport.TransportService) OpenSearchException(org.opensearch.OpenSearchException) ShardRouting(org.opensearch.cluster.routing.ShardRouting) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 17 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class ShardIndexingPressureSettingsIT method getPrimaryReplicaNodeNames.

private Tuple<String, String> getPrimaryReplicaNodeNames(String indexName) {
    IndicesStatsResponse response = client().admin().indices().prepareStats(indexName).get();
    String primaryId = Stream.of(response.getShards()).map(ShardStats::getShardRouting).filter(ShardRouting::primary).findAny().get().currentNodeId();
    String replicaId = Stream.of(response.getShards()).map(ShardStats::getShardRouting).filter(sr -> sr.primary() == false).findAny().get().currentNodeId();
    DiscoveryNodes nodes = client().admin().cluster().prepareState().get().getState().nodes();
    String primaryName = nodes.get(primaryId).getName();
    String replicaName = nodes.get(replicaId).getName();
    return new Tuple<>(primaryName, replicaName);
}
Also used : ShardStats(org.opensearch.action.admin.indices.stats.ShardStats) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) Arrays(java.util.Arrays) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ThreadPool(org.opensearch.threadpool.ThreadPool) BulkRequest(org.opensearch.action.bulk.BulkRequest) ClusterUpdateSettingsRequest(org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest) Releasable(org.opensearch.common.lease.Releasable) MockTransportService(org.opensearch.test.transport.MockTransportService) InternalTestCluster(org.opensearch.test.InternalTestCluster) TransportShardBulkAction(org.opensearch.action.bulk.TransportShardBulkAction) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) InternalSettingsPlugin(org.opensearch.test.InternalSettingsPlugin) UUIDs(org.opensearch.common.UUIDs) BulkItemRequest(org.opensearch.action.bulk.BulkItemRequest) OpenSearchAssertions.assertAcked(org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked) BulkShardRequest(org.opensearch.action.bulk.BulkShardRequest) Collection(java.util.Collection) IndicesService(org.opensearch.indices.IndicesService) Settings(org.opensearch.common.settings.Settings) TransportService(org.opensearch.transport.TransportService) Plugin(org.opensearch.plugins.Plugin) ActionFuture(org.opensearch.action.ActionFuture) Tuple(org.opensearch.common.collect.Tuple) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) RamUsageEstimator(org.apache.lucene.util.RamUsageEstimator) CountDownLatch(java.util.concurrent.CountDownLatch) Stream(java.util.stream.Stream) BulkResponse(org.opensearch.action.bulk.BulkResponse) IndicesStatsResponse(org.opensearch.action.admin.indices.stats.IndicesStatsResponse) ShardStats(org.opensearch.action.admin.indices.stats.ShardStats) IndexRequest(org.opensearch.action.index.IndexRequest) OpenSearchIntegTestCase(org.opensearch.test.OpenSearchIntegTestCase) Collections(java.util.Collections) IndicesStatsResponse(org.opensearch.action.admin.indices.stats.IndicesStatsResponse) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) Tuple(org.opensearch.common.collect.Tuple)

Example 18 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class SearchStatsIT method nodeIdsWithIndex.

private Set<String> nodeIdsWithIndex(String... indices) {
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    GroupShardsIterator<ShardIterator> allAssignedShardsGrouped = state.routingTable().allAssignedShardsGrouped(indices, true);
    Set<String> nodes = new HashSet<>();
    for (ShardIterator shardIterator : allAssignedShardsGrouped) {
        for (ShardRouting routing : shardIterator) {
            if (routing.active()) {
                nodes.add(routing.currentNodeId());
            }
        }
    }
    return nodes;
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) ShardIterator(org.opensearch.cluster.routing.ShardIterator) ShardRouting(org.opensearch.cluster.routing.ShardRouting) HashSet(java.util.HashSet)

Example 19 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class InternalClusterInfoService method buildShardLevelInfo.

static void buildShardLevelInfo(Logger logger, ShardStats[] stats, ImmutableOpenMap.Builder<String, Long> shardSizes, ImmutableOpenMap.Builder<ShardRouting, String> newShardRoutingToDataPath, Map<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace.Builder> reservedSpaceByShard) {
    for (ShardStats s : stats) {
        final ShardRouting shardRouting = s.getShardRouting();
        newShardRoutingToDataPath.put(shardRouting, s.getDataPath());
        final StoreStats storeStats = s.getStats().getStore();
        if (storeStats == null) {
            continue;
        }
        final long size = storeStats.sizeInBytes();
        final long reserved = storeStats.getReservedSize().getBytes();
        final String shardIdentifier = ClusterInfo.shardIdentifierFromRouting(shardRouting);
        logger.trace("shard: {} size: {} reserved: {}", shardIdentifier, size, reserved);
        shardSizes.put(shardIdentifier, size);
        if (reserved != StoreStats.UNKNOWN_RESERVED_BYTES) {
            final ClusterInfo.ReservedSpace.Builder reservedSpaceBuilder = reservedSpaceByShard.computeIfAbsent(new ClusterInfo.NodeAndPath(shardRouting.currentNodeId(), s.getDataPath()), t -> new ClusterInfo.ReservedSpace.Builder());
            reservedSpaceBuilder.add(shardRouting.shardId(), reserved);
        }
    }
}
Also used : ShardStats(org.opensearch.action.admin.indices.stats.ShardStats) StoreStats(org.opensearch.index.store.StoreStats) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 20 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class InternalClusterInfoService method refresh.

/**
 * Refreshes the ClusterInfo in a blocking fashion
 */
public final ClusterInfo refresh() {
    logger.trace("refreshing cluster info");
    final CountDownLatch nodeLatch = updateNodeStats(new ActionListener<NodesStatsResponse>() {

        @Override
        public void onResponse(NodesStatsResponse nodesStatsResponse) {
            ImmutableOpenMap.Builder<String, DiskUsage> leastAvailableUsagesBuilder = ImmutableOpenMap.builder();
            ImmutableOpenMap.Builder<String, DiskUsage> mostAvailableUsagesBuilder = ImmutableOpenMap.builder();
            fillDiskUsagePerNode(logger, adjustNodesStats(nodesStatsResponse.getNodes()), leastAvailableUsagesBuilder, mostAvailableUsagesBuilder);
            leastAvailableSpaceUsages = leastAvailableUsagesBuilder.build();
            mostAvailableSpaceUsages = mostAvailableUsagesBuilder.build();
        }

        @Override
        public void onFailure(Exception e) {
            if (e instanceof ReceiveTimeoutTransportException) {
                logger.error("NodeStatsAction timed out for ClusterInfoUpdateJob", e);
            } else {
                if (e instanceof ClusterBlockException) {
                    if (logger.isTraceEnabled()) {
                        logger.trace("Failed to execute NodeStatsAction for ClusterInfoUpdateJob", e);
                    }
                } else {
                    logger.warn("Failed to execute NodeStatsAction for ClusterInfoUpdateJob", e);
                }
                // we empty the usages list, to be safe - we don't know what's going on.
                leastAvailableSpaceUsages = ImmutableOpenMap.of();
                mostAvailableSpaceUsages = ImmutableOpenMap.of();
            }
        }
    });
    final CountDownLatch indicesLatch = updateIndicesStats(new ActionListener<IndicesStatsResponse>() {

        @Override
        public void onResponse(IndicesStatsResponse indicesStatsResponse) {
            final ShardStats[] stats = indicesStatsResponse.getShards();
            final ImmutableOpenMap.Builder<String, Long> shardSizeByIdentifierBuilder = ImmutableOpenMap.builder();
            final ImmutableOpenMap.Builder<ShardRouting, String> dataPathByShardRoutingBuilder = ImmutableOpenMap.builder();
            final Map<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace.Builder> reservedSpaceBuilders = new HashMap<>();
            buildShardLevelInfo(logger, stats, shardSizeByIdentifierBuilder, dataPathByShardRoutingBuilder, reservedSpaceBuilders);
            final ImmutableOpenMap.Builder<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> rsrvdSpace = ImmutableOpenMap.builder();
            reservedSpaceBuilders.forEach((nodeAndPath, builder) -> rsrvdSpace.put(nodeAndPath, builder.build()));
            indicesStatsSummary = new IndicesStatsSummary(shardSizeByIdentifierBuilder.build(), dataPathByShardRoutingBuilder.build(), rsrvdSpace.build());
        }

        @Override
        public void onFailure(Exception e) {
            if (e instanceof ReceiveTimeoutTransportException) {
                logger.error("IndicesStatsAction timed out for ClusterInfoUpdateJob", e);
            } else {
                if (e instanceof ClusterBlockException) {
                    if (logger.isTraceEnabled()) {
                        logger.trace("Failed to execute IndicesStatsAction for ClusterInfoUpdateJob", e);
                    }
                } else {
                    logger.warn("Failed to execute IndicesStatsAction for ClusterInfoUpdateJob", e);
                }
                // we empty the usages list, to be safe - we don't know what's going on.
                indicesStatsSummary = IndicesStatsSummary.EMPTY;
            }
        }
    });
    try {
        if (nodeLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) {
            logger.warn("Failed to update node information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
        }
    } catch (InterruptedException e) {
        // restore interrupt status
        Thread.currentThread().interrupt();
    }
    try {
        if (indicesLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) {
            logger.warn("Failed to update shard information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
        }
    } catch (InterruptedException e) {
        // restore interrupt status
        Thread.currentThread().interrupt();
    }
    ClusterInfo clusterInfo = getClusterInfo();
    boolean anyListeners = false;
    for (final Consumer<ClusterInfo> listener : listeners) {
        anyListeners = true;
        try {
            logger.trace("notifying [{}] of new cluster info", listener);
            listener.accept(clusterInfo);
        } catch (Exception e) {
            logger.info(new ParameterizedMessage("failed to notify [{}] of new cluster info", listener), e);
        }
    }
    assert anyListeners : "expected to notify at least one listener";
    return clusterInfo;
}
Also used : OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) AbstractRunnable(org.opensearch.common.util.concurrent.AbstractRunnable) ThreadPool(org.opensearch.threadpool.ThreadPool) Level(org.apache.logging.log4j.Level) HashMap(java.util.HashMap) IndicesOptions(org.opensearch.action.support.IndicesOptions) DiskThresholdSettings(org.opensearch.cluster.routing.allocation.DiskThresholdSettings) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) AtomicReference(java.util.concurrent.atomic.AtomicReference) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) LatchedActionListener(org.opensearch.action.LatchedActionListener) Property(org.opensearch.common.settings.Setting.Property) Map(java.util.Map) ActionListener(org.opensearch.action.ActionListener) ClusterSettings(org.opensearch.common.settings.ClusterSettings) StoreStats(org.opensearch.index.store.StoreStats) Client(org.opensearch.client.Client) Setting(org.opensearch.common.settings.Setting) TimeValue(org.opensearch.common.unit.TimeValue) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) Settings(org.opensearch.common.settings.Settings) IndicesStatsRequest(org.opensearch.action.admin.indices.stats.IndicesStatsRequest) ShardRouting(org.opensearch.cluster.routing.ShardRouting) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Logger(org.apache.logging.log4j.Logger) NodesStatsResponse(org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse) ClusterService(org.opensearch.cluster.service.ClusterService) NodeStats(org.opensearch.action.admin.cluster.node.stats.NodeStats) NodesStatsRequest(org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest) IndicesStatsResponse(org.opensearch.action.admin.indices.stats.IndicesStatsResponse) ShardStats(org.opensearch.action.admin.indices.stats.ShardStats) LogManager(org.apache.logging.log4j.LogManager) ReceiveTimeoutTransportException(org.opensearch.transport.ReceiveTimeoutTransportException) FsInfo(org.opensearch.monitor.fs.FsInfo) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) IndicesStatsResponse(org.opensearch.action.admin.indices.stats.IndicesStatsResponse) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) ReceiveTimeoutTransportException(org.opensearch.transport.ReceiveTimeoutTransportException) NodesStatsResponse(org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse) ReceiveTimeoutTransportException(org.opensearch.transport.ReceiveTimeoutTransportException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ShardRouting (org.opensearch.cluster.routing.ShardRouting)361 ClusterState (org.opensearch.cluster.ClusterState)172 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)135 ShardId (org.opensearch.index.shard.ShardId)110 TestShardRouting (org.opensearch.cluster.routing.TestShardRouting)100 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)93 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)85 RoutingTable (org.opensearch.cluster.routing.RoutingTable)84 Settings (org.opensearch.common.settings.Settings)83 Metadata (org.opensearch.cluster.metadata.Metadata)71 HashSet (java.util.HashSet)59 RoutingNode (org.opensearch.cluster.routing.RoutingNode)59 ArrayList (java.util.ArrayList)57 IOException (java.io.IOException)56 List (java.util.List)50 PlainActionFuture (org.opensearch.action.support.PlainActionFuture)50 Index (org.opensearch.index.Index)50 UnassignedInfo (org.opensearch.cluster.routing.UnassignedInfo)49 IndexShard (org.opensearch.index.shard.IndexShard)49 ActionListener (org.opensearch.action.ActionListener)45