use of org.elasticsearch.index.shard.ShardId in project elasticsearch by elastic.
the class IndexMetaDataUpdater method updateInSyncAllocations.
/**
* Updates in-sync allocations with routing changes that were made to the routing table.
*/
private IndexMetaData.Builder updateInSyncAllocations(RoutingTable newRoutingTable, IndexMetaData oldIndexMetaData, IndexMetaData.Builder indexMetaDataBuilder, ShardId shardId, Updates updates) {
assert Sets.haveEmptyIntersection(updates.addedAllocationIds, updates.removedAllocationIds) : "allocation ids cannot be both added and removed in the same allocation round, added ids: " + updates.addedAllocationIds + ", removed ids: " + updates.removedAllocationIds;
Set<String> oldInSyncAllocationIds = oldIndexMetaData.inSyncAllocationIds(shardId.id());
// check if we have been force-initializing an empty primary or a stale primary
if (updates.initializedPrimary != null && oldInSyncAllocationIds.isEmpty() == false && oldInSyncAllocationIds.contains(updates.initializedPrimary.allocationId().getId()) == false) {
// we're not reusing an existing in-sync allocation id to initialize a primary, which means that we're either force-allocating
// an empty or a stale primary (see AllocateEmptyPrimaryAllocationCommand or AllocateStalePrimaryAllocationCommand).
RecoverySource.Type recoverySourceType = updates.initializedPrimary.recoverySource().getType();
boolean emptyPrimary = recoverySourceType == RecoverySource.Type.EMPTY_STORE;
assert updates.addedAllocationIds.isEmpty() : (emptyPrimary ? "empty" : "stale") + " primary is not force-initialized in same allocation round where shards are started";
if (indexMetaDataBuilder == null) {
indexMetaDataBuilder = IndexMetaData.builder(oldIndexMetaData);
}
if (emptyPrimary) {
// forcing an empty primary resets the in-sync allocations to the empty set (ShardRouting.allocatedPostIndexCreate)
indexMetaDataBuilder.putInSyncAllocationIds(shardId.id(), Collections.emptySet());
} else {
// forcing a stale primary resets the in-sync allocations to the singleton set with the stale id
indexMetaDataBuilder.putInSyncAllocationIds(shardId.id(), Collections.singleton(updates.initializedPrimary.allocationId().getId()));
}
} else {
// standard path for updating in-sync ids
Set<String> inSyncAllocationIds = new HashSet<>(oldInSyncAllocationIds);
inSyncAllocationIds.addAll(updates.addedAllocationIds);
inSyncAllocationIds.removeAll(updates.removedAllocationIds);
// Prevent set of inSyncAllocationIds to grow unboundedly. This can happen for example if we don't write to a primary
// but repeatedly shut down nodes that have active replicas.
// We use number_of_replicas + 1 (= possible active shard copies) to bound the inSyncAllocationIds set
// Only trim the set of allocation ids when it grows, otherwise we might trim too eagerly when the number
// of replicas was decreased while shards were unassigned.
// +1 for the primary
int maxActiveShards = oldIndexMetaData.getNumberOfReplicas() + 1;
IndexShardRoutingTable newShardRoutingTable = newRoutingTable.shardRoutingTable(shardId);
if (inSyncAllocationIds.size() > oldInSyncAllocationIds.size() && inSyncAllocationIds.size() > maxActiveShards) {
// trim entries that have no corresponding shard routing in the cluster state (i.e. trim unavailable copies)
List<ShardRouting> assignedShards = newShardRoutingTable.assignedShards();
assert assignedShards.size() <= maxActiveShards : "cannot have more assigned shards " + assignedShards + " than maximum possible active shards " + maxActiveShards;
Set<String> assignedAllocations = assignedShards.stream().map(s -> s.allocationId().getId()).collect(Collectors.toSet());
inSyncAllocationIds = inSyncAllocationIds.stream().sorted(// values with routing entries first
Comparator.comparing(assignedAllocations::contains).reversed()).limit(maxActiveShards).collect(Collectors.toSet());
}
// in-sync set, this could create an empty primary on the next allocation.
if (newShardRoutingTable.activeShards().isEmpty() && updates.firstFailedPrimary != null) {
// add back allocation id of failed primary
inSyncAllocationIds.add(updates.firstFailedPrimary.allocationId().getId());
}
assert inSyncAllocationIds.isEmpty() == false || oldInSyncAllocationIds.isEmpty() : "in-sync allocations cannot become empty after they have been non-empty: " + oldInSyncAllocationIds;
// be extra safe here and only update in-sync set if it is non-empty
if (inSyncAllocationIds.isEmpty() == false) {
if (indexMetaDataBuilder == null) {
indexMetaDataBuilder = IndexMetaData.builder(oldIndexMetaData);
}
indexMetaDataBuilder.putInSyncAllocationIds(shardId.id(), inSyncAllocationIds);
}
}
return indexMetaDataBuilder;
}
use of org.elasticsearch.index.shard.ShardId in project elasticsearch by elastic.
the class IndexMetaDataUpdater method removeStaleIdsWithoutRoutings.
/**
* Removes allocation ids from the in-sync set for shard copies for which there is no routing entries in the routing table.
* This method is called in AllocationService before any changes to the routing table are made.
*/
public static ClusterState removeStaleIdsWithoutRoutings(ClusterState clusterState, List<StaleShard> staleShards) {
MetaData oldMetaData = clusterState.metaData();
RoutingTable oldRoutingTable = clusterState.routingTable();
MetaData.Builder metaDataBuilder = null;
// group staleShards entries by index
for (Map.Entry<Index, List<StaleShard>> indexEntry : staleShards.stream().collect(Collectors.groupingBy(fs -> fs.getShardId().getIndex())).entrySet()) {
final IndexMetaData oldIndexMetaData = oldMetaData.getIndexSafe(indexEntry.getKey());
IndexMetaData.Builder indexMetaDataBuilder = null;
// group staleShards entries by shard id
for (Map.Entry<ShardId, List<StaleShard>> shardEntry : indexEntry.getValue().stream().collect(Collectors.groupingBy(staleShard -> staleShard.getShardId())).entrySet()) {
int shardNumber = shardEntry.getKey().getId();
Set<String> oldInSyncAllocations = oldIndexMetaData.inSyncAllocationIds(shardNumber);
Set<String> idsToRemove = shardEntry.getValue().stream().map(e -> e.getAllocationId()).collect(Collectors.toSet());
assert idsToRemove.stream().allMatch(id -> oldRoutingTable.getByAllocationId(shardEntry.getKey(), id) == null) : "removing stale ids: " + idsToRemove + ", some of which have still a routing entry: " + oldRoutingTable;
Set<String> remainingInSyncAllocations = Sets.difference(oldInSyncAllocations, idsToRemove);
assert remainingInSyncAllocations.isEmpty() == false : "Set of in-sync ids cannot become empty for shard " + shardEntry.getKey() + " (before: " + oldInSyncAllocations + ", ids to remove: " + idsToRemove + ")";
// (see ShardRouting#allocatedPostIndexCreate)
if (remainingInSyncAllocations.isEmpty() == false) {
if (indexMetaDataBuilder == null) {
indexMetaDataBuilder = IndexMetaData.builder(oldIndexMetaData);
}
indexMetaDataBuilder.putInSyncAllocationIds(shardNumber, remainingInSyncAllocations);
}
}
if (indexMetaDataBuilder != null) {
if (metaDataBuilder == null) {
metaDataBuilder = MetaData.builder(oldMetaData);
}
metaDataBuilder.put(indexMetaDataBuilder);
}
}
if (metaDataBuilder != null) {
return ClusterState.builder(clusterState).metaData(metaDataBuilder).build();
} else {
return clusterState;
}
}
use of org.elasticsearch.index.shard.ShardId in project elasticsearch by elastic.
the class IndexMetaDataUpdater method applyChanges.
/**
* Updates the current {@link MetaData} based on the changes of this RoutingChangesObserver. Specifically
* we update {@link IndexMetaData#getInSyncAllocationIds()} and {@link IndexMetaData#primaryTerm(int)} based on
* the changes made during this allocation.
*
* @param oldMetaData {@link MetaData} object from before the routing nodes was changed.
* @param newRoutingTable {@link RoutingTable} object after routing changes were applied.
* @return adapted {@link MetaData}, potentially the original one if no change was needed.
*/
public MetaData applyChanges(MetaData oldMetaData, RoutingTable newRoutingTable) {
Map<Index, List<Map.Entry<ShardId, Updates>>> changesGroupedByIndex = shardChanges.entrySet().stream().collect(Collectors.groupingBy(e -> e.getKey().getIndex()));
MetaData.Builder metaDataBuilder = null;
for (Map.Entry<Index, List<Map.Entry<ShardId, Updates>>> indexChanges : changesGroupedByIndex.entrySet()) {
Index index = indexChanges.getKey();
final IndexMetaData oldIndexMetaData = oldMetaData.getIndexSafe(index);
IndexMetaData.Builder indexMetaDataBuilder = null;
for (Map.Entry<ShardId, Updates> shardEntry : indexChanges.getValue()) {
ShardId shardId = shardEntry.getKey();
Updates updates = shardEntry.getValue();
indexMetaDataBuilder = updateInSyncAllocations(newRoutingTable, oldIndexMetaData, indexMetaDataBuilder, shardId, updates);
indexMetaDataBuilder = updatePrimaryTerm(oldIndexMetaData, indexMetaDataBuilder, shardId, updates);
}
if (indexMetaDataBuilder != null) {
if (metaDataBuilder == null) {
metaDataBuilder = MetaData.builder(oldMetaData);
}
metaDataBuilder.put(indexMetaDataBuilder);
}
}
if (metaDataBuilder != null) {
return metaDataBuilder.build();
} else {
return oldMetaData;
}
}
use of org.elasticsearch.index.shard.ShardId in project elasticsearch by elastic.
the class RoutingNodes method assertShardStats.
/**
* Calculates RoutingNodes statistics by iterating over all {@link ShardRouting}s
* in the cluster to ensure the book-keeping is correct.
* For performance reasons, this should only be called from asserts
*
* @return this method always returns <code>true</code> or throws an assertion error. If assertion are not enabled
* this method does nothing.
*/
public static boolean assertShardStats(RoutingNodes routingNodes) {
boolean run = false;
// only run if assertions are enabled!
assert (run = true);
if (!run) {
return true;
}
int unassignedPrimaryCount = 0;
int unassignedIgnoredPrimaryCount = 0;
int inactivePrimaryCount = 0;
int inactiveShardCount = 0;
int relocating = 0;
Map<Index, Integer> indicesAndShards = new HashMap<>();
for (RoutingNode node : routingNodes) {
for (ShardRouting shard : node) {
if (shard.initializing() && shard.relocatingNodeId() == null) {
inactiveShardCount++;
if (shard.primary()) {
inactivePrimaryCount++;
}
}
if (shard.relocating()) {
relocating++;
}
Integer i = indicesAndShards.get(shard.index());
if (i == null) {
i = shard.id();
}
indicesAndShards.put(shard.index(), Math.max(i, shard.id()));
}
}
// Assert that the active shard routing are identical.
Set<Map.Entry<Index, Integer>> entries = indicesAndShards.entrySet();
final List<ShardRouting> shards = new ArrayList<>();
for (Map.Entry<Index, Integer> e : entries) {
Index index = e.getKey();
for (int i = 0; i < e.getValue(); i++) {
for (RoutingNode routingNode : routingNodes) {
for (ShardRouting shardRouting : routingNode) {
if (shardRouting.index().equals(index) && shardRouting.id() == i) {
shards.add(shardRouting);
}
}
}
List<ShardRouting> mutableShardRoutings = routingNodes.assignedShards(new ShardId(index, i));
assert mutableShardRoutings.size() == shards.size();
for (ShardRouting r : mutableShardRoutings) {
assert shards.contains(r);
shards.remove(r);
}
assert shards.isEmpty();
}
}
for (ShardRouting shard : routingNodes.unassigned()) {
if (shard.primary()) {
unassignedPrimaryCount++;
}
}
for (ShardRouting shard : routingNodes.unassigned().ignored()) {
if (shard.primary()) {
unassignedIgnoredPrimaryCount++;
}
}
for (Map.Entry<String, Recoveries> recoveries : routingNodes.recoveriesPerNode.entrySet()) {
String node = recoveries.getKey();
final Recoveries value = recoveries.getValue();
int incoming = 0;
int outgoing = 0;
RoutingNode routingNode = routingNodes.nodesToShards.get(node);
if (routingNode != null) {
// node might have dropped out of the cluster
for (ShardRouting routing : routingNode) {
if (routing.initializing()) {
incoming++;
}
if (routing.primary() && routing.isRelocationTarget() == false) {
for (ShardRouting assigned : routingNodes.assignedShards.get(routing.shardId())) {
if (assigned.initializing() && assigned.recoverySource().getType() == RecoverySource.Type.PEER) {
outgoing++;
}
}
}
}
}
assert incoming == value.incoming : incoming + " != " + value.incoming + " node: " + routingNode;
assert outgoing == value.outgoing : outgoing + " != " + value.outgoing + " node: " + routingNode;
}
assert unassignedPrimaryCount == routingNodes.unassignedShards.getNumPrimaries() : "Unassigned primaries is [" + unassignedPrimaryCount + "] but RoutingNodes returned unassigned primaries [" + routingNodes.unassigned().getNumPrimaries() + "]";
assert unassignedIgnoredPrimaryCount == routingNodes.unassignedShards.getNumIgnoredPrimaries() : "Unassigned ignored primaries is [" + unassignedIgnoredPrimaryCount + "] but RoutingNodes returned unassigned ignored primaries [" + routingNodes.unassigned().getNumIgnoredPrimaries() + "]";
assert inactivePrimaryCount == routingNodes.inactivePrimaryCount : "Inactive Primary count [" + inactivePrimaryCount + "] but RoutingNodes returned inactive primaries [" + routingNodes.inactivePrimaryCount + "]";
assert inactiveShardCount == routingNodes.inactiveShardCount : "Inactive Shard count [" + inactiveShardCount + "] but RoutingNodes returned inactive shards [" + routingNodes.inactiveShardCount + "]";
assert routingNodes.getRelocatingShardCount() == relocating : "Relocating shards mismatch [" + routingNodes.getRelocatingShardCount() + "] but expected [" + relocating + "]";
return true;
}
use of org.elasticsearch.index.shard.ShardId in project elasticsearch by elastic.
the class SnapshotsInProgress method writeTo.
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(entries.size());
for (Entry entry : entries) {
entry.snapshot().writeTo(out);
out.writeBoolean(entry.includeGlobalState());
out.writeBoolean(entry.partial());
out.writeByte(entry.state().value());
out.writeVInt(entry.indices().size());
for (IndexId index : entry.indices()) {
index.writeTo(out);
}
out.writeLong(entry.startTime());
out.writeVInt(entry.shards().size());
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : entry.shards()) {
shardEntry.key.writeTo(out);
out.writeOptionalString(shardEntry.value.nodeId());
out.writeByte(shardEntry.value.state().value());
}
if (out.getVersion().onOrAfter(REPOSITORY_ID_INTRODUCED_VERSION)) {
out.writeLong(entry.repositoryStateId);
}
}
}
Aggregations