Search in sources :

Example 26 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class ReplicaShardAllocator method makeAllocationDecision.

@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
    if (isResponsibleFor(unassignedShard) == false) {
        // this allocator is not responsible for deciding on this shard
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    final RoutingNodes routingNodes = allocation.routingNodes();
    final boolean explain = allocation.debugDecision();
    // pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
    Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
    Decision allocateDecision = result.v1();
    if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
        // only return early if we are not in explain mode, or we are in explain mode but we have not
        // yet attempted to fetch any shard data
        logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
    }
    AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(unassignedShard, allocation);
    if (shardStores.hasData() == false) {
        logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
        allocation.setHasPendingAsyncFetch();
        List<NodeAllocationResult> nodeDecisions = null;
        if (explain) {
            nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
        }
        return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
    }
    ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
    if (primaryShard == null) {
        assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
    }
    assert primaryShard.currentNodeId() != null;
    final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
    final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
    if (primaryStore == null) {
        // if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
        // we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
        // will try and recover from
        // Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
        logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, false, primaryNode, primaryStore, shardStores, explain);
    assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
    List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
    if (allocateDecision.type() != Decision.Type.YES) {
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
    } else if (matchingNodes.getNodeWithHighestMatch() != null) {
        RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
        // we only check on THROTTLE since we checked before on NO
        Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
            logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
            return AllocateUnassignedDecision.throttle(nodeDecisions);
        } else {
            logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we found a match
            return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
        }
    } else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
        // if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
        // unassigned due to a node leaving, so we delay allocation of this replica to see if the
        // node with the shard copy will rejoin so we can re-use the copy it has
        logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
        long remainingDelayMillis = 0L;
        long totalDelayMillis = 0L;
        if (explain) {
            UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
            Metadata metadata = allocation.metadata();
            IndexMetadata indexMetadata = metadata.index(unassignedShard.index());
            totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetadata.getSettings()).getMillis();
            long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetadata.getSettings());
            remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
        }
        return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
    }
    return AllocateUnassignedDecision.NOT_TAKEN;
}
Also used : NodeStoreFilesMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) RoutingNodes(org.opensearch.cluster.routing.RoutingNodes) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) ArrayList(java.util.ArrayList) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) StoreFileMetadata(org.opensearch.index.store.StoreFileMetadata) TransportNodesListShardStoreMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata) NodeStoreFilesMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) Decision(org.opensearch.cluster.routing.allocation.decider.Decision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) RoutingNode(org.opensearch.cluster.routing.RoutingNode) TransportNodesListShardStoreMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata) ShardRouting(org.opensearch.cluster.routing.ShardRouting) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashMap(java.util.HashMap) Map(java.util.Map) NodeAllocationResult(org.opensearch.cluster.routing.allocation.NodeAllocationResult)

Example 27 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class Gateway method performStateRecovery.

public void performStateRecovery(final GatewayStateRecoveredListener listener) throws GatewayException {
    final String[] nodesIds = clusterService.state().nodes().getMasterNodes().keys().toArray(String.class);
    logger.trace("performing state recovery from {}", Arrays.toString(nodesIds));
    final TransportNodesListGatewayMetaState.NodesGatewayMetaState nodesState = listGatewayMetaState.list(nodesIds, null).actionGet();
    final int requiredAllocation = 1;
    if (nodesState.hasFailures()) {
        for (final FailedNodeException failedNodeException : nodesState.failures()) {
            logger.warn("failed to fetch state from node", failedNodeException);
        }
    }
    final ObjectFloatHashMap<Index> indices = new ObjectFloatHashMap<>();
    Metadata electedGlobalState = null;
    int found = 0;
    for (final TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
        if (nodeState.metadata() == null) {
            continue;
        }
        found++;
        if (electedGlobalState == null) {
            electedGlobalState = nodeState.metadata();
        } else if (nodeState.metadata().version() > electedGlobalState.version()) {
            electedGlobalState = nodeState.metadata();
        }
        for (final ObjectCursor<IndexMetadata> cursor : nodeState.metadata().indices().values()) {
            indices.addTo(cursor.value.getIndex(), 1);
        }
    }
    if (found < requiredAllocation) {
        listener.onFailure("found [" + found + "] metadata states, required [" + requiredAllocation + "]");
        return;
    }
    // update the global state, and clean the indices, we elect them in the next phase
    final Metadata.Builder metadataBuilder = Metadata.builder(electedGlobalState).removeAllIndices();
    assert !indices.containsKey(null);
    final Object[] keys = indices.keys;
    for (int i = 0; i < keys.length; i++) {
        if (keys[i] != null) {
            final Index index = (Index) keys[i];
            IndexMetadata electedIndexMetadata = null;
            int indexMetadataCount = 0;
            for (final TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
                if (nodeState.metadata() == null) {
                    continue;
                }
                final IndexMetadata indexMetadata = nodeState.metadata().index(index);
                if (indexMetadata == null) {
                    continue;
                }
                if (electedIndexMetadata == null) {
                    electedIndexMetadata = indexMetadata;
                } else if (indexMetadata.getVersion() > electedIndexMetadata.getVersion()) {
                    electedIndexMetadata = indexMetadata;
                }
                indexMetadataCount++;
            }
            if (electedIndexMetadata != null) {
                if (indexMetadataCount < requiredAllocation) {
                    logger.debug("[{}] found [{}], required [{}], not adding", index, indexMetadataCount, requiredAllocation);
                }
                // TODO if this logging statement is correct then we are missing an else here
                metadataBuilder.put(electedIndexMetadata, false);
            }
        }
    }
    ClusterState recoveredState = Function.<ClusterState>identity().andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings())).apply(ClusterState.builder(clusterService.getClusterName()).metadata(metadataBuilder).build());
    listener.onSuccess(recoveredState);
}
Also used : Arrays(java.util.Arrays) FailedNodeException(org.opensearch.action.FailedNodeException) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) Settings(org.opensearch.common.settings.Settings) ObjectCursor(com.carrotsearch.hppc.cursors.ObjectCursor) Function(java.util.function.Function) ClusterState(org.opensearch.cluster.ClusterState) Logger(org.apache.logging.log4j.Logger) ClusterService(org.opensearch.cluster.service.ClusterService) LogManager(org.apache.logging.log4j.LogManager) ObjectFloatHashMap(com.carrotsearch.hppc.ObjectFloatHashMap) ClusterState(org.opensearch.cluster.ClusterState) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) FailedNodeException(org.opensearch.action.FailedNodeException) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ObjectFloatHashMap(com.carrotsearch.hppc.ObjectFloatHashMap)

Example 28 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class MetaStateService method loadFullState.

/**
 * Loads the full state, which includes both the global state and all the indices meta data. <br>
 * When loading, manifest file is consulted (represented by {@link Manifest} class), to load proper generations. <br>
 * If there is no manifest file on disk, this method fallbacks to BWC mode, where latest generation of global and indices
 * metadata is loaded. Please note that currently there is no way to distinguish between manifest file being removed and manifest
 * file was not yet created. It means that this method always fallbacks to BWC mode, if there is no manifest file.
 *
 * @return tuple of {@link Manifest} and {@link Metadata} with global metadata and indices metadata. If there is no state on disk,
 * meta state with globalGeneration -1 and empty meta data is returned.
 * @throws IOException if some IOException when loading files occurs or there is no metadata referenced by manifest file.
 */
public Tuple<Manifest, Metadata> loadFullState() throws IOException {
    final Manifest manifest = MANIFEST_FORMAT.loadLatestState(logger, namedXContentRegistry, nodeEnv.nodeDataPaths());
    if (manifest == null) {
        return loadFullStateBWC();
    }
    final Metadata.Builder metadataBuilder;
    if (manifest.isGlobalGenerationMissing()) {
        metadataBuilder = Metadata.builder();
    } else {
        final Metadata globalMetadata = METADATA_FORMAT.loadGeneration(logger, namedXContentRegistry, manifest.getGlobalGeneration(), nodeEnv.nodeDataPaths());
        if (globalMetadata != null) {
            metadataBuilder = Metadata.builder(globalMetadata);
        } else {
            throw new IOException("failed to find global metadata [generation: " + manifest.getGlobalGeneration() + "]");
        }
    }
    for (Map.Entry<Index, Long> entry : manifest.getIndexGenerations().entrySet()) {
        final Index index = entry.getKey();
        final long generation = entry.getValue();
        final String indexFolderName = index.getUUID();
        final IndexMetadata indexMetadata = INDEX_METADATA_FORMAT.loadGeneration(logger, namedXContentRegistry, generation, nodeEnv.resolveIndexFolder(indexFolderName));
        if (indexMetadata != null) {
            metadataBuilder.put(indexMetadata, false);
        } else {
            throw new IOException("failed to find metadata for existing index " + index.getName() + " [location: " + indexFolderName + ", generation: " + generation + "]");
        }
    }
    return new Tuple<>(manifest, metadataBuilder.build());
}
Also used : Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) IOException(java.io.IOException) Manifest(org.opensearch.cluster.metadata.Manifest) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.opensearch.common.collect.Tuple)

Example 29 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class PersistedClusterStateService method loadOnDiskState.

private OnDiskState loadOnDiskState(Path dataPath, DirectoryReader reader) throws IOException {
    final IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null);
    final SetOnce<Metadata.Builder> builderReference = new SetOnce<>();
    consumeFromType(searcher, GLOBAL_TYPE_NAME, bytes -> {
        final Metadata metadata = Metadata.Builder.fromXContent(XContentFactory.xContent(XContentType.SMILE).createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, bytes.bytes, bytes.offset, bytes.length));
        logger.trace("found global metadata with last-accepted term [{}]", metadata.coordinationMetadata().term());
        if (builderReference.get() != null) {
            throw new IllegalStateException("duplicate global metadata found in [" + dataPath + "]");
        }
        builderReference.set(Metadata.builder(metadata));
    });
    final Metadata.Builder builder = builderReference.get();
    if (builder == null) {
        throw new IllegalStateException("no global metadata found in [" + dataPath + "]");
    }
    logger.trace("got global metadata, now reading index metadata");
    final Set<String> indexUUIDs = new HashSet<>();
    consumeFromType(searcher, INDEX_TYPE_NAME, bytes -> {
        final IndexMetadata indexMetadata = IndexMetadata.fromXContent(XContentFactory.xContent(XContentType.SMILE).createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, bytes.bytes, bytes.offset, bytes.length));
        logger.trace("found index metadata for {}", indexMetadata.getIndex());
        if (indexUUIDs.add(indexMetadata.getIndexUUID()) == false) {
            throw new IllegalStateException("duplicate metadata found for " + indexMetadata.getIndex() + " in [" + dataPath + "]");
        }
        builder.put(indexMetadata, false);
    });
    final Map<String, String> userData = reader.getIndexCommit().getUserData();
    logger.trace("loaded metadata [{}] from [{}]", userData, reader.directory());
    assert userData.size() == COMMIT_DATA_SIZE : userData;
    assert userData.get(CURRENT_TERM_KEY) != null;
    assert userData.get(LAST_ACCEPTED_VERSION_KEY) != null;
    assert userData.get(NODE_ID_KEY) != null;
    assert userData.get(NODE_VERSION_KEY) != null;
    return new OnDiskState(userData.get(NODE_ID_KEY), dataPath, Long.parseLong(userData.get(CURRENT_TERM_KEY)), Long.parseLong(userData.get(LAST_ACCEPTED_VERSION_KEY)), builder.build());
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SetOnce(org.apache.lucene.util.SetOnce) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) Metadata(org.opensearch.cluster.metadata.Metadata) NodeMetadata(org.opensearch.env.NodeMetadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashSet(java.util.HashSet)

Example 30 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class CreateIndexIT method testCreationDateGenerated.

public void testCreationDateGenerated() {
    long timeBeforeRequest = System.currentTimeMillis();
    prepareCreate("test").get();
    long timeAfterRequest = System.currentTimeMillis();
    ClusterStateResponse response = client().admin().cluster().prepareState().get();
    ClusterState state = response.getState();
    assertThat(state, notNullValue());
    Metadata metadata = state.getMetadata();
    assertThat(metadata, notNullValue());
    ImmutableOpenMap<String, IndexMetadata> indices = metadata.getIndices();
    assertThat(indices, notNullValue());
    assertThat(indices.size(), equalTo(1));
    IndexMetadata index = indices.get("test");
    assertThat(index, notNullValue());
    assertThat(index.getCreationDate(), allOf(lessThanOrEqualTo(timeAfterRequest), greaterThanOrEqualTo(timeBeforeRequest)));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) ClusterStateResponse(org.opensearch.action.admin.cluster.state.ClusterStateResponse) MappingMetadata(org.opensearch.cluster.metadata.MappingMetadata) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata)

Aggregations

IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)403 ClusterState (org.opensearch.cluster.ClusterState)144 Metadata (org.opensearch.cluster.metadata.Metadata)126 Settings (org.opensearch.common.settings.Settings)125 IndexSettings (org.opensearch.index.IndexSettings)87 Index (org.opensearch.index.Index)80 ShardRouting (org.opensearch.cluster.routing.ShardRouting)65 ShardId (org.opensearch.index.shard.ShardId)61 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)53 IOException (java.io.IOException)49 ArrayList (java.util.ArrayList)49 HashMap (java.util.HashMap)49 HashSet (java.util.HashSet)48 Matchers.containsString (org.hamcrest.Matchers.containsString)47 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)46 RoutingTable (org.opensearch.cluster.routing.RoutingTable)46 Map (java.util.Map)42 ClusterService (org.opensearch.cluster.service.ClusterService)40 List (java.util.List)38 ActionListener (org.opensearch.action.ActionListener)35