use of org.apache.ignite.internal.raft.Loza in project ignite-3 by apache.
the class MetaStorageManager method start.
/**
* {@inheritDoc}
*/
@Override
public void start() {
String[] metastorageNodes = this.locCfgMgr.configurationRegistry().getConfiguration(NodeConfiguration.KEY).metastorageNodes().value();
Predicate<ClusterNode> metaStorageNodesContainsLocPred = clusterNode -> Arrays.asList(metastorageNodes).contains(clusterNode.name());
if (metastorageNodes.length > 0) {
metaStorageNodesOnStart = true;
List<ClusterNode> metaStorageMembers = clusterNetSvc.topologyService().allMembers().stream().filter(metaStorageNodesContainsLocPred).collect(Collectors.toList());
// without hosting metastorage, this will be rewritten in init phase https://issues.apache.org/jira/browse/IGNITE-15114
if (metaStorageMembers.isEmpty()) {
throw new IgniteException("Cannot start meta storage manager because there is no node in the cluster that hosts meta storage.");
}
// This will be rewritten in init phase https://issues.apache.org/jira/browse/IGNITE-15114
if (metastorageNodes.length > 1) {
throw new IgniteException("Cannot start meta storage manager because it is not allowed to start several metastorage nodes.");
}
storage.start();
try {
raftGroupServiceFut = raftMgr.prepareRaftGroup(METASTORAGE_RAFT_GROUP_NAME, metaStorageMembers, () -> new MetaStorageListener(storage));
} catch (NodeStoppingException e) {
throw new AssertionError("Loza was stopped before Meta Storage manager", e);
}
this.metaStorageSvcFut = raftGroupServiceFut.thenApply(service -> new MetaStorageServiceImpl(service, clusterNetSvc.topologyService().localMember().id()));
if (hasMetastorageLocally(locCfgMgr)) {
clusterNetSvc.topologyService().addEventHandler(new TopologyEventHandler() {
@Override
public void onAppeared(ClusterNode member) {
// No-op.
}
@Override
public void onDisappeared(ClusterNode member) {
metaStorageSvcFut.thenCompose(svc -> svc.closeCursors(member.id()));
}
});
}
} else {
this.metaStorageSvcFut = new CompletableFuture<>();
}
// TODO: IGNITE-15114 Cluster initialization flow. Here we should complete metaStorageServiceFuture.
// clusterNetSvc.messagingService().addMessageHandler((message, senderAddr, correlationId) -> {});
}
use of org.apache.ignite.internal.raft.Loza in project ignite-3 by apache.
the class TableManager method createTableLocally.
/**
* Creates local structures for a table.
*
* @param causalityToken Causality token.
* @param name Table name.
* @param tblId Table id.
* @param assignment Affinity assignment.
*/
private void createTableLocally(long causalityToken, String name, UUID tblId, List<List<ClusterNode>> assignment, SchemaDescriptor schemaDesc) {
int partitions = assignment.size();
var partitionsGroupsFutures = new ArrayList<CompletableFuture<RaftGroupService>>();
Path storageDir = partitionsStoreDir.resolve(name);
try {
Files.createDirectories(storageDir);
} catch (IOException e) {
throw new IgniteInternalException("Failed to create partitions store directory for " + name + ": " + e.getMessage(), e);
}
TableConfiguration tableCfg = tablesCfg.tables().get(name);
DataRegion dataRegion = dataRegions.computeIfAbsent(tableCfg.dataRegion().value(), dataRegionName -> {
DataRegion newDataRegion = engine.createDataRegion(dataStorageCfg.regions().get(dataRegionName));
try {
newDataRegion.start();
} catch (Exception e) {
try {
newDataRegion.stop();
} catch (Exception stopException) {
e.addSuppressed(stopException);
}
throw e;
}
return newDataRegion;
});
TableStorage tableStorage = engine.createTable(storageDir, tableCfg, dataRegion);
tableStorage.start();
for (int p = 0; p < partitions; p++) {
int partId = p;
try {
partitionsGroupsFutures.add(raftMgr.prepareRaftGroup(raftGroupName(tblId, p), assignment.get(p), () -> new PartitionListener(tblId, new VersionedRowStore(tableStorage.getOrCreatePartition(partId), txManager))));
} catch (NodeStoppingException e) {
throw new AssertionError("Loza was stopped before Table manager", e);
}
}
CompletableFuture.allOf(partitionsGroupsFutures.toArray(CompletableFuture[]::new)).thenRun(() -> {
try {
Int2ObjectOpenHashMap<RaftGroupService> partitionMap = new Int2ObjectOpenHashMap<>(partitions);
for (int p = 0; p < partitions; p++) {
CompletableFuture<RaftGroupService> future = partitionsGroupsFutures.get(p);
assert future.isDone();
RaftGroupService service = future.join();
partitionMap.put(p, service);
}
InternalTableImpl internalTable = new InternalTableImpl(name, tblId, partitionMap, partitions, netAddrResolver, txManager, tableStorage);
var schemaRegistry = new SchemaRegistryImpl(v -> {
if (!busyLock.enterBusy()) {
throw new IgniteException(new NodeStoppingException());
}
try {
return tableSchema(tblId, v);
} finally {
busyLock.leaveBusy();
}
}, () -> {
if (!busyLock.enterBusy()) {
throw new IgniteException(new NodeStoppingException());
}
try {
return latestSchemaVersion(tblId);
} finally {
busyLock.leaveBusy();
}
});
schemaRegistry.onSchemaRegistered(schemaDesc);
var table = new TableImpl(internalTable, schemaRegistry);
tablesVv.update(causalityToken, previous -> {
var val = previous == null ? new HashMap() : new HashMap<>(previous);
val.put(name, table);
return val;
}, th -> {
throw new IgniteInternalException(IgniteStringFormatter.format("Cannot create a table [name={}, id={}]", name, tblId), th);
});
tablesByIdVv.update(causalityToken, previous -> {
var val = previous == null ? new HashMap() : new HashMap<>(previous);
val.put(tblId, table);
return val;
}, th -> {
throw new IgniteInternalException(IgniteStringFormatter.format("Cannot create a table [name={}, id={}]", name, tblId), th);
});
completeApiCreateFuture(table);
fireEvent(TableEvent.CREATE, new TableEventParameters(causalityToken, table), null);
} catch (Exception e) {
fireEvent(TableEvent.CREATE, new TableEventParameters(causalityToken, tblId, name), e);
}
}).join();
}
use of org.apache.ignite.internal.raft.Loza in project ignite-3 by apache.
the class TableManager method start.
/**
* {@inheritDoc}
*/
@Override
public void start() {
tablesCfg.tables().listenElements(new ConfigurationNamedListListener<>() {
@Override
public CompletableFuture<?> onCreate(ConfigurationNotificationEvent<TableView> ctx) {
if (!busyLock.enterBusy()) {
String tblName = ctx.newValue().name();
UUID tblId = ((ExtendedTableView) ctx.newValue()).id();
fireEvent(TableEvent.CREATE, new TableEventParameters(ctx.storageRevision(), tblId, tblName), new NodeStoppingException());
return CompletableFuture.failedFuture(new NodeStoppingException());
}
try {
onTableCreateInternal(ctx);
} finally {
busyLock.leaveBusy();
}
return CompletableFuture.completedFuture(null);
}
/**
* Method for handle a table configuration event.
*
* @param ctx Configuration event.
*/
private void onTableCreateInternal(ConfigurationNotificationEvent<TableView> ctx) {
String tblName = ctx.newValue().name();
UUID tblId = ((ExtendedTableView) ctx.newValue()).id();
// configuration, which is not supported now.
assert ((ExtendedTableView) ctx.newValue()).assignments() != null : IgniteStringFormatter.format("Table [id={}, name={}] has empty assignments.", tblId, tblName);
// TODO: IGNITE-16369 Listener with any placeholder should be used instead.
((ExtendedTableConfiguration) tablesCfg.tables().get(tblName)).schemas().listenElements(new ConfigurationNamedListListener<>() {
@Override
public CompletableFuture<?> onCreate(ConfigurationNotificationEvent<SchemaView> schemasCtx) {
long causalityToken = schemasCtx.storageRevision();
if (!busyLock.enterBusy()) {
fireEvent(TableEvent.ALTER, new TableEventParameters(causalityToken, tblId, tblName), new NodeStoppingException());
return CompletableFuture.failedFuture(new NodeStoppingException());
}
try {
// FIXME: https://issues.apache.org/jira/browse/IGNITE-16369
if (ctx.storageRevision() != schemasCtx.storageRevision()) {
return tablesByIdVv.get(causalityToken).thenAccept(tablesById -> {
TableImpl table = tablesById.get(tblId);
((SchemaRegistryImpl) table.schemaView()).onSchemaRegistered(SchemaSerializerImpl.INSTANCE.deserialize((schemasCtx.newValue().schema())));
fireEvent(TableEvent.ALTER, new TableEventParameters(causalityToken, table), null);
});
}
return CompletableFuture.completedFuture(null);
} catch (Exception e) {
fireEvent(TableEvent.ALTER, new TableEventParameters(causalityToken, tblId, tblName), e);
return CompletableFuture.failedFuture(e);
} finally {
busyLock.leaveBusy();
}
}
});
((ExtendedTableConfiguration) tablesCfg.tables().get(tblName)).assignments().listen(assignmentsCtx -> {
if (!busyLock.enterBusy()) {
return CompletableFuture.failedFuture(new NodeStoppingException());
}
try {
// FIXME: https://issues.apache.org/jira/browse/IGNITE-16369
if (ctx.storageRevision() == assignmentsCtx.storageRevision()) {
return CompletableFuture.completedFuture(null);
} else {
return updateAssignmentInternal(assignmentsCtx.storageRevision(), tblId, assignmentsCtx);
}
} finally {
busyLock.leaveBusy();
}
});
createTableLocally(ctx.storageRevision(), tblName, tblId, (List<List<ClusterNode>>) ByteUtils.fromBytes(((ExtendedTableView) ctx.newValue()).assignments()), SchemaSerializerImpl.INSTANCE.deserialize(((ExtendedTableView) ctx.newValue()).schemas().get(String.valueOf(INITIAL_SCHEMA_VERSION)).schema()));
}
private CompletableFuture<?> updateAssignmentInternal(long causalityToken, UUID tblId, ConfigurationNotificationEvent<byte[]> assignmentsCtx) {
List<List<ClusterNode>> oldAssignments = (List<List<ClusterNode>>) ByteUtils.fromBytes(assignmentsCtx.oldValue());
List<List<ClusterNode>> newAssignments = (List<List<ClusterNode>>) ByteUtils.fromBytes(assignmentsCtx.newValue());
CompletableFuture<?>[] futures = new CompletableFuture<?>[oldAssignments.size()];
// TODO: be exact same amount of partitions and replicas for both old and new assignments
for (int i = 0; i < oldAssignments.size(); i++) {
int partId = i;
List<ClusterNode> oldPartitionAssignment = oldAssignments.get(partId);
List<ClusterNode> newPartitionAssignment = newAssignments.get(partId);
var toAdd = new HashSet<>(newPartitionAssignment);
toAdd.removeAll(oldPartitionAssignment);
// Create new raft nodes according to new assignments.
futures[i] = tablesByIdVv.get(causalityToken).thenCompose(tablesById -> {
InternalTable internalTable = tablesById.get(tblId).internalTable();
try {
return raftMgr.updateRaftGroup(raftGroupName(tblId, partId), newPartitionAssignment, toAdd, () -> new PartitionListener(tblId, new VersionedRowStore(internalTable.storage().getOrCreatePartition(partId), txManager))).thenAccept(updatedRaftGroupService -> ((InternalTableImpl) internalTable).updateInternalTableRaftGroupService(partId, updatedRaftGroupService)).exceptionally(th -> {
LOG.error("Failed to update raft groups one the node", th);
return null;
});
} catch (NodeStoppingException e) {
throw new AssertionError("Loza was stopped before Table manager", e);
}
});
}
return CompletableFuture.allOf(futures);
}
@Override
public CompletableFuture<?> onRename(String oldName, String newName, ConfigurationNotificationEvent<TableView> ctx) {
return CompletableFuture.completedFuture(null);
}
@Override
public CompletableFuture<?> onDelete(ConfigurationNotificationEvent<TableView> ctx) {
if (!busyLock.enterBusy()) {
String tblName = ctx.oldValue().name();
UUID tblId = ((ExtendedTableView) ctx.oldValue()).id();
fireEvent(TableEvent.DROP, new TableEventParameters(ctx.storageRevision(), tblId, tblName), new NodeStoppingException());
return CompletableFuture.failedFuture(new NodeStoppingException());
}
try {
dropTableLocally(ctx.storageRevision(), ctx.oldValue().name(), ((ExtendedTableView) ctx.oldValue()).id(), (List<List<ClusterNode>>) ByteUtils.fromBytes(((ExtendedTableView) ctx.oldValue()).assignments()));
} finally {
busyLock.leaveBusy();
}
return CompletableFuture.completedFuture(null);
}
});
engine.start();
DataRegion defaultDataRegion = engine.createDataRegion(dataStorageCfg.defaultRegion());
dataRegions.put(DEFAULT_DATA_REGION_NAME, defaultDataRegion);
defaultDataRegion.start();
}
use of org.apache.ignite.internal.raft.Loza in project ignite-3 by apache.
the class ItTxDistributedTestSingleNode method before.
/**
* Initialize the test state.
*/
@Override
@BeforeEach
public void before() throws Exception {
int nodes = nodes();
int replicas = replicas();
assertTrue(nodes > 0);
assertTrue(replicas > 0);
List<NetworkAddress> localAddresses = findLocalAddresses(NODE_PORT_BASE, NODE_PORT_BASE + nodes);
var nodeFinder = new StaticNodeFinder(localAddresses);
nodeFinder.findNodes().parallelStream().map(addr -> startNode(testInfo, addr.toString(), addr.port(), nodeFinder)).forEach(cluster::add);
for (ClusterService node : cluster) {
assertTrue(waitForTopology(node, nodes, 1000));
}
log.info("The cluster has been started");
if (startClient()) {
client = startNode(testInfo, "client", NODE_PORT_BASE - 1, nodeFinder);
assertTrue(waitForTopology(client, nodes + 1, 1000));
log.info("The client has been started");
}
// Start raft servers. Each raft server can hold multiple groups.
raftServers = new HashMap<>(nodes);
txManagers = new HashMap<>(nodes);
executor = new ScheduledThreadPoolExecutor(20, new NamedThreadFactory(Loza.CLIENT_POOL_NAME));
for (int i = 0; i < nodes; i++) {
var raftSrv = new Loza(cluster.get(i), workDir);
raftSrv.start();
ClusterNode node = cluster.get(i).topologyService().localMember();
raftServers.put(node, raftSrv);
TableTxManagerImpl txMgr = new TableTxManagerImpl(cluster.get(i), new HeapLockManager());
txMgr.start();
txManagers.put(node, txMgr);
}
log.info("Raft servers have been started");
final String accountsName = "accounts";
final String customersName = "customers";
UUID accTblId = UUID.randomUUID();
UUID custTblId = UUID.randomUUID();
accRaftClients = startTable(accountsName, accTblId);
custRaftClients = startTable(customersName, custTblId);
log.info("Partition groups have been started");
TxManager txMgr;
if (startClient()) {
txMgr = new TxManagerImpl(client, new HeapLockManager());
} else {
// Collocated mode.
txMgr = txManagers.get(accRaftClients.get(0).clusterService().topologyService().localMember());
}
assertNotNull(txMgr);
igniteTransactions = new IgniteTransactionsImpl(txMgr);
this.accounts = new TableImpl(new InternalTableImpl(accountsName, accTblId, accRaftClients, 1, NetworkAddress::toString, txMgr, Mockito.mock(TableStorage.class)), new DummySchemaManagerImpl(ACCOUNTS_SCHEMA));
this.customers = new TableImpl(new InternalTableImpl(customersName, custTblId, custRaftClients, 1, NetworkAddress::toString, txMgr, Mockito.mock(TableStorage.class)), new DummySchemaManagerImpl(CUSTOMERS_SCHEMA));
log.info("Tables have been started");
}
use of org.apache.ignite.internal.raft.Loza in project ignite-3 by apache.
the class ItTxDistributedTestSingleNode method startTable.
/**
* Starts a table.
*
* @param name The name.
* @param tblId Table id.
* @return Groups map.
*/
protected Int2ObjectOpenHashMap<RaftGroupService> startTable(String name, UUID tblId) throws Exception {
List<List<ClusterNode>> assignment = RendezvousAffinityFunction.assignPartitions(cluster.stream().map(node -> node.topologyService().localMember()).collect(Collectors.toList()), 1, replicas(), false, null);
Int2ObjectOpenHashMap<RaftGroupService> clients = new Int2ObjectOpenHashMap<>();
for (int p = 0; p < assignment.size(); p++) {
List<ClusterNode> partNodes = assignment.get(p);
String grpId = name + "-part-" + p;
List<Peer> conf = partNodes.stream().map(n -> n.address()).map(Peer::new).collect(Collectors.toList());
for (ClusterNode node : partNodes) {
raftServers.get(node).prepareRaftGroup(grpId, partNodes, () -> new PartitionListener(tblId, new VersionedRowStore(new ConcurrentHashMapPartitionStorage(), txManagers.get(node))));
}
if (startClient()) {
RaftGroupService service = RaftGroupServiceImpl.start(grpId, client, FACTORY, 10_000, conf, true, 200, executor).get(5, TimeUnit.SECONDS);
clients.put(p, service);
} else {
// Create temporary client to find a leader address.
ClusterService tmpSvc = raftServers.values().stream().findFirst().get().service();
RaftGroupService service = RaftGroupServiceImpl.start(grpId, tmpSvc, FACTORY, 10_000, conf, true, 200, executor).get(5, TimeUnit.SECONDS);
Peer leader = service.leader();
service.shutdown();
Loza leaderSrv = raftServers.get(tmpSvc.topologyService().getByAddress(leader.address()));
RaftGroupService leaderClusterSvc = RaftGroupServiceImpl.start(grpId, leaderSrv.service(), FACTORY, 10_000, conf, true, 200, executor).get(5, TimeUnit.SECONDS);
clients.put(p, leaderClusterSvc);
}
}
return clients;
}
Aggregations