use of org.apache.ignite.network.ClusterNode in project ignite-3 by apache.
the class TableManager method createTableLocally.
/**
* Creates local structures for a table.
*
* @param causalityToken Causality token.
* @param name Table name.
* @param tblId Table id.
* @param assignment Affinity assignment.
*/
private void createTableLocally(long causalityToken, String name, UUID tblId, List<List<ClusterNode>> assignment, SchemaDescriptor schemaDesc) {
int partitions = assignment.size();
var partitionsGroupsFutures = new ArrayList<CompletableFuture<RaftGroupService>>();
Path storageDir = partitionsStoreDir.resolve(name);
try {
Files.createDirectories(storageDir);
} catch (IOException e) {
throw new IgniteInternalException("Failed to create partitions store directory for " + name + ": " + e.getMessage(), e);
}
TableConfiguration tableCfg = tablesCfg.tables().get(name);
DataRegion dataRegion = dataRegions.computeIfAbsent(tableCfg.dataRegion().value(), dataRegionName -> {
DataRegion newDataRegion = engine.createDataRegion(dataStorageCfg.regions().get(dataRegionName));
try {
newDataRegion.start();
} catch (Exception e) {
try {
newDataRegion.stop();
} catch (Exception stopException) {
e.addSuppressed(stopException);
}
throw e;
}
return newDataRegion;
});
TableStorage tableStorage = engine.createTable(storageDir, tableCfg, dataRegion);
tableStorage.start();
for (int p = 0; p < partitions; p++) {
int partId = p;
try {
partitionsGroupsFutures.add(raftMgr.prepareRaftGroup(raftGroupName(tblId, p), assignment.get(p), () -> new PartitionListener(tblId, new VersionedRowStore(tableStorage.getOrCreatePartition(partId), txManager))));
} catch (NodeStoppingException e) {
throw new AssertionError("Loza was stopped before Table manager", e);
}
}
CompletableFuture.allOf(partitionsGroupsFutures.toArray(CompletableFuture[]::new)).thenRun(() -> {
try {
Int2ObjectOpenHashMap<RaftGroupService> partitionMap = new Int2ObjectOpenHashMap<>(partitions);
for (int p = 0; p < partitions; p++) {
CompletableFuture<RaftGroupService> future = partitionsGroupsFutures.get(p);
assert future.isDone();
RaftGroupService service = future.join();
partitionMap.put(p, service);
}
InternalTableImpl internalTable = new InternalTableImpl(name, tblId, partitionMap, partitions, netAddrResolver, txManager, tableStorage);
var schemaRegistry = new SchemaRegistryImpl(v -> {
if (!busyLock.enterBusy()) {
throw new IgniteException(new NodeStoppingException());
}
try {
return tableSchema(tblId, v);
} finally {
busyLock.leaveBusy();
}
}, () -> {
if (!busyLock.enterBusy()) {
throw new IgniteException(new NodeStoppingException());
}
try {
return latestSchemaVersion(tblId);
} finally {
busyLock.leaveBusy();
}
});
schemaRegistry.onSchemaRegistered(schemaDesc);
var table = new TableImpl(internalTable, schemaRegistry);
tablesVv.update(causalityToken, previous -> {
var val = previous == null ? new HashMap() : new HashMap<>(previous);
val.put(name, table);
return val;
}, th -> {
throw new IgniteInternalException(IgniteStringFormatter.format("Cannot create a table [name={}, id={}]", name, tblId), th);
});
tablesByIdVv.update(causalityToken, previous -> {
var val = previous == null ? new HashMap() : new HashMap<>(previous);
val.put(tblId, table);
return val;
}, th -> {
throw new IgniteInternalException(IgniteStringFormatter.format("Cannot create a table [name={}, id={}]", name, tblId), th);
});
completeApiCreateFuture(table);
fireEvent(TableEvent.CREATE, new TableEventParameters(causalityToken, table), null);
} catch (Exception e) {
fireEvent(TableEvent.CREATE, new TableEventParameters(causalityToken, tblId, name), e);
}
}).join();
}
use of org.apache.ignite.network.ClusterNode in project ignite-3 by apache.
the class TableManager method updateAssignments.
/**
* Update assignments for all current tables according to input nodes list. These approach has known issues {@link
* Ignite#setBaseline(Set)}.
*
* @param clusterNodes Set of nodes for assignment.
*/
private void updateAssignments(Set<ClusterNode> clusterNodes) {
var setBaselineFut = new CompletableFuture<>();
var changePeersQueue = new ArrayList<Supplier<CompletableFuture<Void>>>();
tablesCfg.tables().change(tbls -> {
changePeersQueue.clear();
for (int i = 0; i < tbls.size(); i++) {
tbls.createOrUpdate(tbls.get(i).name(), changeX -> {
ExtendedTableChange change = (ExtendedTableChange) changeX;
byte[] currAssignments = change.assignments();
List<List<ClusterNode>> recalculatedAssignments = AffinityUtils.calculateAssignments(clusterNodes, change.partitions(), change.replicas());
if (!recalculatedAssignments.equals(ByteUtils.fromBytes(currAssignments))) {
change.changeAssignments(ByteUtils.toBytes(recalculatedAssignments));
changePeersQueue.add(() -> updateRaftTopology((List<List<ClusterNode>>) ByteUtils.fromBytes(currAssignments), recalculatedAssignments, change.id()));
}
});
}
}).thenCompose((v) -> {
CompletableFuture<?>[] changePeersFutures = new CompletableFuture<?>[changePeersQueue.size()];
int i = 0;
for (Supplier<CompletableFuture<Void>> task : changePeersQueue) {
changePeersFutures[i++] = task.get();
}
return CompletableFuture.allOf(changePeersFutures);
}).whenComplete((res, th) -> {
if (th != null) {
setBaselineFut.completeExceptionally(th);
} else {
setBaselineFut.complete(null);
}
});
setBaselineFut.join();
}
use of org.apache.ignite.network.ClusterNode in project ignite-3 by apache.
the class TableManagerTest method mockManagersAndCreateTableWithDelay.
/**
* Instantiates a table and prepares Table manager. When the latch would open, the method completes.
*
* @param tableDefinition Configuration schema for a table.
* @param tblManagerFut Future for table manager.
* @param phaser Phaser for the wait.
* @return Table manager.
* @throws NodeStoppingException If something went wrong.
*/
@NotNull
private TableImpl mockManagersAndCreateTableWithDelay(TableDefinition tableDefinition, CompletableFuture<TableManager> tblManagerFut, Phaser phaser) throws NodeStoppingException {
when(rm.prepareRaftGroup(any(), any(), any())).thenAnswer(mock -> {
RaftGroupService raftGrpSrvcMock = mock(RaftGroupService.class);
when(raftGrpSrvcMock.leader()).thenReturn(new Peer(new NetworkAddress("localhost", 47500)));
return CompletableFuture.completedFuture(raftGrpSrvcMock);
});
when(ts.getByAddress(any(NetworkAddress.class))).thenReturn(new ClusterNode(UUID.randomUUID().toString(), "node0", new NetworkAddress("localhost", 47500)));
try (MockedStatic<SchemaUtils> schemaServiceMock = mockStatic(SchemaUtils.class)) {
schemaServiceMock.when(() -> SchemaUtils.prepareSchemaDescriptor(anyInt(), any())).thenReturn(mock(SchemaDescriptor.class));
}
try (MockedStatic<AffinityUtils> affinityServiceMock = mockStatic(AffinityUtils.class)) {
ArrayList<List<ClusterNode>> assignment = new ArrayList<>(PARTITIONS);
for (int part = 0; part < PARTITIONS; part++) {
assignment.add(new ArrayList<>(Collections.singleton(node)));
}
affinityServiceMock.when(() -> AffinityUtils.calculateAssignments(any(), anyInt(), anyInt())).thenReturn(assignment);
}
TableManager tableManager = createTableManager(tblManagerFut);
final int tablesBeforeCreation = tableManager.tables().size();
tblsCfg.tables().listen(ctx -> {
boolean createTbl = ctx.newValue().get(tableDefinition.canonicalName()) != null && ctx.oldValue().get(tableDefinition.canonicalName()) == null;
boolean dropTbl = ctx.oldValue().get(tableDefinition.canonicalName()) != null && ctx.newValue().get(tableDefinition.canonicalName()) == null;
if (!createTbl && !dropTbl) {
return CompletableFuture.completedFuture(null);
}
if (phaser != null) {
phaser.arriveAndAwaitAdvance();
}
return CompletableFuture.completedFuture(null);
});
TableImpl tbl2 = (TableImpl) tableManager.createTable(tableDefinition.canonicalName(), tblCh -> SchemaConfigurationConverter.convert(tableDefinition, tblCh).changeReplicas(REPLICAS).changePartitions(PARTITIONS));
assertNotNull(tbl2);
assertEquals(tablesBeforeCreation + 1, tableManager.tables().size());
return tbl2;
}
use of org.apache.ignite.network.ClusterNode in project ignite-3 by apache.
the class RendezvousAffinityFunction method assignPartition.
/**
* Returns collection of nodes for specified partition.
*
* @param part Partition.
* @param nodes Nodes.
* @param replicas Number partition replicas.
* @param neighborhoodCache Neighborhood.
* @param exclNeighbors If true neighbors are excluded, false otherwise.
* @param nodeFilter Filter for nodes.
* @return Assignment.
*/
public static List<ClusterNode> assignPartition(int part, List<ClusterNode> nodes, int replicas, Map<String, Collection<ClusterNode>> neighborhoodCache, boolean exclNeighbors, BiPredicate<ClusterNode, List<ClusterNode>> nodeFilter) {
if (nodes.size() <= 1) {
return nodes;
}
IgniteBiTuple<Long, ClusterNode>[] hashArr = (IgniteBiTuple<Long, ClusterNode>[]) new IgniteBiTuple[nodes.size()];
for (int i = 0; i < nodes.size(); i++) {
ClusterNode node = nodes.get(i);
Object nodeHash = resolveNodeHash(node);
long hash = hash(nodeHash.hashCode(), part);
hashArr[i] = new IgniteBiTuple<>(hash, node);
}
final int effectiveReplicas = replicas == Integer.MAX_VALUE ? nodes.size() : Math.min(replicas, nodes.size());
Iterable<ClusterNode> sortedNodes = new LazyLinearSortedContainer(hashArr, effectiveReplicas);
// REPLICATED cache case
if (replicas == Integer.MAX_VALUE) {
return replicatedAssign(nodes, sortedNodes);
}
Iterator<ClusterNode> it = sortedNodes.iterator();
List<ClusterNode> res = new ArrayList<>(effectiveReplicas);
Collection<ClusterNode> allNeighbors = new HashSet<>();
ClusterNode first = it.next();
res.add(first);
if (exclNeighbors) {
allNeighbors.addAll(neighborhoodCache.get(first.id()));
}
// Select another replicas.
if (replicas > 1) {
while (it.hasNext() && res.size() < effectiveReplicas) {
ClusterNode node = it.next();
if (exclNeighbors) {
if (!allNeighbors.contains(node)) {
res.add(node);
allNeighbors.addAll(neighborhoodCache.get(node.id()));
}
} else if (nodeFilter == null || nodeFilter.test(node, res)) {
res.add(node);
if (exclNeighbors) {
allNeighbors.addAll(neighborhoodCache.get(node.id()));
}
}
}
}
if (res.size() < effectiveReplicas && nodes.size() >= effectiveReplicas && exclNeighbors) {
// Need to iterate again in case if there are no nodes which pass exclude neighbors replicas criteria.
it = sortedNodes.iterator();
it.next();
while (it.hasNext() && res.size() < effectiveReplicas) {
ClusterNode node = it.next();
if (!res.contains(node)) {
res.add(node);
}
}
if (!exclNeighborsWarn) {
LOG.warn("Affinity function excludeNeighbors property is ignored " + "because topology has no enough nodes to assign all replicas.");
exclNeighborsWarn = true;
}
}
assert res.size() <= effectiveReplicas;
return res;
}
use of org.apache.ignite.network.ClusterNode in project ignite-3 by apache.
the class RendezvousAffinityFunctionTest method testPartitionDistribution.
@Test
public void testPartitionDistribution() {
int nodes = 50;
int parts = 10_000;
int replicas = 4;
List<ClusterNode> clusterNodes = prepareNetworkTopology(nodes);
assertTrue(parts > nodes, "Partitions should be more that nodes");
int ideal = (parts * replicas) / nodes;
List<List<ClusterNode>> assignment = RendezvousAffinityFunction.assignPartitions(clusterNodes, parts, replicas, false, null);
HashMap<ClusterNode, ArrayList<Integer>> assignmentByNode = new HashMap<>(nodes);
int part = 0;
for (List<ClusterNode> partNodes : assignment) {
for (ClusterNode node : partNodes) {
ArrayList<Integer> nodeParts = assignmentByNode.get(node);
if (nodeParts == null) {
assignmentByNode.put(node, nodeParts = new ArrayList<>());
}
nodeParts.add(part);
}
part++;
}
for (ClusterNode node : clusterNodes) {
ArrayList<Integer> nodeParts = assignmentByNode.get(node);
assertNotNull(nodeParts);
assertTrue(nodeParts.size() > ideal * (1 - AFFINITY_DEVIATION_RATIO) && nodeParts.size() < ideal * (1 + AFFINITY_DEVIATION_RATIO), "Partition distribution is too far from ideal [node=" + node + ", size=" + nodeParts.size() + ", idealSize=" + ideal + ", parts=" + compact(nodeParts) + ']');
}
}
Aggregations