use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.
the class ReplicaShardAllocator method makeAllocationDecision.
@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
if (isResponsibleFor(unassignedShard) == false) {
// this allocator is not responsible for deciding on this shard
return AllocateUnassignedDecision.NOT_TAKEN;
}
final RoutingNodes routingNodes = allocation.routingNodes();
final boolean explain = allocation.debugDecision();
// pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
Decision allocateDecision = result.v1();
if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
// only return early if we are not in explain mode, or we are in explain mode but we have not
// yet attempted to fetch any shard data
logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
}
AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(unassignedShard, allocation);
if (shardStores.hasData() == false) {
logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
allocation.setHasPendingAsyncFetch();
List<NodeAllocationResult> nodeDecisions = null;
if (explain) {
nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
}
return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
}
ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
if (primaryShard == null) {
assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
}
assert primaryShard.currentNodeId() != null;
final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
if (primaryStore == null) {
// if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
// we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
// will try and recover from
// Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
return AllocateUnassignedDecision.NOT_TAKEN;
}
MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, false, primaryNode, primaryStore, shardStores, explain);
assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
if (allocateDecision.type() != Decision.Type.YES) {
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
} else if (matchingNodes.getNodeWithHighestMatch() != null) {
RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
// we only check on THROTTLE since we checked before on NO
Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
if (decision.type() == Decision.Type.THROTTLE) {
logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
// we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
return AllocateUnassignedDecision.throttle(nodeDecisions);
} else {
logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
// we found a match
return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
}
} else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
// if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
// unassigned due to a node leaving, so we delay allocation of this replica to see if the
// node with the shard copy will rejoin so we can re-use the copy it has
logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
long remainingDelayMillis = 0L;
long totalDelayMillis = 0L;
if (explain) {
UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
Metadata metadata = allocation.metadata();
IndexMetadata indexMetadata = metadata.index(unassignedShard.index());
totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetadata.getSettings()).getMillis();
long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetadata.getSettings());
remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
}
return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
}
return AllocateUnassignedDecision.NOT_TAKEN;
}
use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.
the class Gateway method performStateRecovery.
public void performStateRecovery(final GatewayStateRecoveredListener listener) throws GatewayException {
final String[] nodesIds = clusterService.state().nodes().getMasterNodes().keys().toArray(String.class);
logger.trace("performing state recovery from {}", Arrays.toString(nodesIds));
final TransportNodesListGatewayMetaState.NodesGatewayMetaState nodesState = listGatewayMetaState.list(nodesIds, null).actionGet();
final int requiredAllocation = 1;
if (nodesState.hasFailures()) {
for (final FailedNodeException failedNodeException : nodesState.failures()) {
logger.warn("failed to fetch state from node", failedNodeException);
}
}
final ObjectFloatHashMap<Index> indices = new ObjectFloatHashMap<>();
Metadata electedGlobalState = null;
int found = 0;
for (final TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
if (nodeState.metadata() == null) {
continue;
}
found++;
if (electedGlobalState == null) {
electedGlobalState = nodeState.metadata();
} else if (nodeState.metadata().version() > electedGlobalState.version()) {
electedGlobalState = nodeState.metadata();
}
for (final ObjectCursor<IndexMetadata> cursor : nodeState.metadata().indices().values()) {
indices.addTo(cursor.value.getIndex(), 1);
}
}
if (found < requiredAllocation) {
listener.onFailure("found [" + found + "] metadata states, required [" + requiredAllocation + "]");
return;
}
// update the global state, and clean the indices, we elect them in the next phase
final Metadata.Builder metadataBuilder = Metadata.builder(electedGlobalState).removeAllIndices();
assert !indices.containsKey(null);
final Object[] keys = indices.keys;
for (int i = 0; i < keys.length; i++) {
if (keys[i] != null) {
final Index index = (Index) keys[i];
IndexMetadata electedIndexMetadata = null;
int indexMetadataCount = 0;
for (final TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
if (nodeState.metadata() == null) {
continue;
}
final IndexMetadata indexMetadata = nodeState.metadata().index(index);
if (indexMetadata == null) {
continue;
}
if (electedIndexMetadata == null) {
electedIndexMetadata = indexMetadata;
} else if (indexMetadata.getVersion() > electedIndexMetadata.getVersion()) {
electedIndexMetadata = indexMetadata;
}
indexMetadataCount++;
}
if (electedIndexMetadata != null) {
if (indexMetadataCount < requiredAllocation) {
logger.debug("[{}] found [{}], required [{}], not adding", index, indexMetadataCount, requiredAllocation);
}
// TODO if this logging statement is correct then we are missing an else here
metadataBuilder.put(electedIndexMetadata, false);
}
}
}
ClusterState recoveredState = Function.<ClusterState>identity().andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings())).apply(ClusterState.builder(clusterService.getClusterName()).metadata(metadataBuilder).build());
listener.onSuccess(recoveredState);
}
use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.
the class MetaStateService method loadFullState.
/**
* Loads the full state, which includes both the global state and all the indices meta data. <br>
* When loading, manifest file is consulted (represented by {@link Manifest} class), to load proper generations. <br>
* If there is no manifest file on disk, this method fallbacks to BWC mode, where latest generation of global and indices
* metadata is loaded. Please note that currently there is no way to distinguish between manifest file being removed and manifest
* file was not yet created. It means that this method always fallbacks to BWC mode, if there is no manifest file.
*
* @return tuple of {@link Manifest} and {@link Metadata} with global metadata and indices metadata. If there is no state on disk,
* meta state with globalGeneration -1 and empty meta data is returned.
* @throws IOException if some IOException when loading files occurs or there is no metadata referenced by manifest file.
*/
public Tuple<Manifest, Metadata> loadFullState() throws IOException {
final Manifest manifest = MANIFEST_FORMAT.loadLatestState(logger, namedXContentRegistry, nodeEnv.nodeDataPaths());
if (manifest == null) {
return loadFullStateBWC();
}
final Metadata.Builder metadataBuilder;
if (manifest.isGlobalGenerationMissing()) {
metadataBuilder = Metadata.builder();
} else {
final Metadata globalMetadata = METADATA_FORMAT.loadGeneration(logger, namedXContentRegistry, manifest.getGlobalGeneration(), nodeEnv.nodeDataPaths());
if (globalMetadata != null) {
metadataBuilder = Metadata.builder(globalMetadata);
} else {
throw new IOException("failed to find global metadata [generation: " + manifest.getGlobalGeneration() + "]");
}
}
for (Map.Entry<Index, Long> entry : manifest.getIndexGenerations().entrySet()) {
final Index index = entry.getKey();
final long generation = entry.getValue();
final String indexFolderName = index.getUUID();
final IndexMetadata indexMetadata = INDEX_METADATA_FORMAT.loadGeneration(logger, namedXContentRegistry, generation, nodeEnv.resolveIndexFolder(indexFolderName));
if (indexMetadata != null) {
metadataBuilder.put(indexMetadata, false);
} else {
throw new IOException("failed to find metadata for existing index " + index.getName() + " [location: " + indexFolderName + ", generation: " + generation + "]");
}
}
return new Tuple<>(manifest, metadataBuilder.build());
}
use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.
the class PersistedClusterStateService method loadOnDiskState.
private OnDiskState loadOnDiskState(Path dataPath, DirectoryReader reader) throws IOException {
final IndexSearcher searcher = new IndexSearcher(reader);
searcher.setQueryCache(null);
final SetOnce<Metadata.Builder> builderReference = new SetOnce<>();
consumeFromType(searcher, GLOBAL_TYPE_NAME, bytes -> {
final Metadata metadata = Metadata.Builder.fromXContent(XContentFactory.xContent(XContentType.SMILE).createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, bytes.bytes, bytes.offset, bytes.length));
logger.trace("found global metadata with last-accepted term [{}]", metadata.coordinationMetadata().term());
if (builderReference.get() != null) {
throw new IllegalStateException("duplicate global metadata found in [" + dataPath + "]");
}
builderReference.set(Metadata.builder(metadata));
});
final Metadata.Builder builder = builderReference.get();
if (builder == null) {
throw new IllegalStateException("no global metadata found in [" + dataPath + "]");
}
logger.trace("got global metadata, now reading index metadata");
final Set<String> indexUUIDs = new HashSet<>();
consumeFromType(searcher, INDEX_TYPE_NAME, bytes -> {
final IndexMetadata indexMetadata = IndexMetadata.fromXContent(XContentFactory.xContent(XContentType.SMILE).createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, bytes.bytes, bytes.offset, bytes.length));
logger.trace("found index metadata for {}", indexMetadata.getIndex());
if (indexUUIDs.add(indexMetadata.getIndexUUID()) == false) {
throw new IllegalStateException("duplicate metadata found for " + indexMetadata.getIndex() + " in [" + dataPath + "]");
}
builder.put(indexMetadata, false);
});
final Map<String, String> userData = reader.getIndexCommit().getUserData();
logger.trace("loaded metadata [{}] from [{}]", userData, reader.directory());
assert userData.size() == COMMIT_DATA_SIZE : userData;
assert userData.get(CURRENT_TERM_KEY) != null;
assert userData.get(LAST_ACCEPTED_VERSION_KEY) != null;
assert userData.get(NODE_ID_KEY) != null;
assert userData.get(NODE_VERSION_KEY) != null;
return new OnDiskState(userData.get(NODE_ID_KEY), dataPath, Long.parseLong(userData.get(CURRENT_TERM_KEY)), Long.parseLong(userData.get(LAST_ACCEPTED_VERSION_KEY)), builder.build());
}
use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.
the class CreateIndexIT method testCreationDateGenerated.
public void testCreationDateGenerated() {
long timeBeforeRequest = System.currentTimeMillis();
prepareCreate("test").get();
long timeAfterRequest = System.currentTimeMillis();
ClusterStateResponse response = client().admin().cluster().prepareState().get();
ClusterState state = response.getState();
assertThat(state, notNullValue());
Metadata metadata = state.getMetadata();
assertThat(metadata, notNullValue());
ImmutableOpenMap<String, IndexMetadata> indices = metadata.getIndices();
assertThat(indices, notNullValue());
assertThat(indices.size(), equalTo(1));
IndexMetadata index = indices.get("test");
assertThat(index, notNullValue());
assertThat(index.getCreationDate(), allOf(lessThanOrEqualTo(timeAfterRequest), greaterThanOrEqualTo(timeBeforeRequest)));
}
Aggregations