use of org.apache.bookkeeper.client.api.LedgerMetadata in project herddb by diennea.
the class BookieNotAvailableTest method testBookieNotAvailableDuringTransaction.
@Test
public void testBookieNotAvailableDuringTransaction() throws Exception {
ServerConfiguration serverconfig_1 = newServerConfigurationWithAutoPort(folder.newFolder().toPath());
serverconfig_1.set(ServerConfiguration.PROPERTY_NODEID, "server1");
serverconfig_1.set(ServerConfiguration.PROPERTY_MODE, ServerConfiguration.PROPERTY_MODE_CLUSTER);
serverconfig_1.set(ServerConfiguration.PROPERTY_ZOOKEEPER_ADDRESS, testEnv.getAddress());
serverconfig_1.set(ServerConfiguration.PROPERTY_ZOOKEEPER_PATH, testEnv.getPath());
serverconfig_1.set(ServerConfiguration.PROPERTY_ZOOKEEPER_SESSIONTIMEOUT, testEnv.getTimeout());
serverconfig_1.set(ServerConfiguration.PROPERTY_ENFORCE_LEADERSHIP, false);
try (Server server = new Server(serverconfig_1)) {
server.start();
server.waitForStandaloneBoot();
Table table = Table.builder().name("t1").column("c", ColumnTypes.INTEGER).primaryKey("c").build();
// create table is done out of the transaction (this is very like autocommit=true)
server.getManager().executeStatement(new CreateTableStatement(table), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), TransactionContext.NO_TRANSACTION);
StatementExecutionResult executeStatement = server.getManager().executeUpdate(new InsertStatement(TableSpace.DEFAULT, "t1", RecordSerializer.makeRecord(table, "c", 1)), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), TransactionContext.AUTOTRANSACTION_TRANSACTION);
long transactionId = executeStatement.transactionId;
server.getManager().executeUpdate(new InsertStatement(TableSpace.DEFAULT, "t1", RecordSerializer.makeRecord(table, "c", 2)), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), new TransactionContext(transactionId));
server.getManager().executeUpdate(new InsertStatement(TableSpace.DEFAULT, "t1", RecordSerializer.makeRecord(table, "c", 3)), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), new TransactionContext(transactionId));
TableSpaceManager tableSpaceManager = server.getManager().getTableSpaceManager(TableSpace.DEFAULT);
BookkeeperCommitLog log = (BookkeeperCommitLog) tableSpaceManager.getLog();
long ledgerId = log.getLastSequenceNumber().ledgerId;
assertTrue(ledgerId >= 0);
Transaction transaction = tableSpaceManager.getTransactions().stream().filter(t -> t.transactionId == transactionId).findFirst().get();
// Transaction will synch, so every addEntry will be acked, but will not be "confirmed" yet
transaction.sync();
try (DataScanner scan = scan(server.getManager(), "select * from t1", Collections.emptyList(), new TransactionContext(transactionId))) {
assertEquals(3, scan.consume().size());
}
try (DataScanner scan = scan(server.getManager(), "select * from t1", Collections.emptyList(), TransactionContext.NO_TRANSACTION)) {
// no record, but the table exists!
assertEquals(0, scan.consume().size());
}
// we do not want auto-recovery
server.getManager().setActivatorPauseStatus(true);
BookieId bookieAddr = testEnv.stopBookie();
// transaction will continue and see the failure only the time of the commit
try {
server.getManager().executeUpdate(new InsertStatement(TableSpace.DEFAULT, "t1", RecordSerializer.makeRecord(table, "c", 4)), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), new TransactionContext(transactionId));
// this will piggyback the LAC for the transaction
System.out.println("Insert of c,4 OK");
} catch (StatementExecutionException expected) {
System.out.println("Insert of c,4 failed " + expected);
// in can happen that the log gets closed
assertEquals(herddb.log.LogNotAvailableException.class, expected.getCause().getClass());
}
try {
server.getManager().executeUpdate(new InsertStatement(TableSpace.DEFAULT, "t1", RecordSerializer.makeRecord(table, "c", 5)), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), new TransactionContext(transactionId));
// this will piggyback the LAC for the transaction
System.out.println("Insert of c,5 OK");
} catch (StatementExecutionException expected) {
System.out.println("Insert of c,5 failed " + expected);
// in can happen that the log gets closed
assertEquals(herddb.log.LogNotAvailableException.class, expected.getCause().getClass());
}
try {
server.getManager().executeUpdate(new InsertStatement(TableSpace.DEFAULT, "t1", RecordSerializer.makeRecord(table, "c", 6)), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), new TransactionContext(transactionId));
// this will piggyback the LAC for the transaction
System.out.println("Insert of c,6 OK");
} catch (StatementExecutionException expected) {
System.out.println("Insert of c,6 failed " + expected);
// in can happen that the log gets closed
assertEquals(herddb.log.LogNotAvailableException.class, expected.getCause().getClass());
}
try {
server.getManager().executeStatement(new CommitTransactionStatement(TableSpace.DEFAULT, transactionId), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), TransactionContext.NO_TRANSACTION);
// this will fail alweays
fail();
} catch (StatementExecutionException expected) {
System.out.println("Commit failed as expected:" + expected);
}
testEnv.startStoppedBookie(bookieAddr);
while (true) {
System.out.println("status leader:" + tableSpaceManager.isLeader() + " failed:" + tableSpaceManager.isFailed());
if (tableSpaceManager.isFailed()) {
break;
}
Thread.sleep(100);
}
try (BookKeeper bk = createBookKeeper();
LedgerHandle handle = bk.openLedgerNoRecovery(ledgerId, BookKeeper.DigestType.CRC32C, "herddb".getBytes(StandardCharsets.UTF_8))) {
BookKeeperAdmin admin = new BookKeeperAdmin(bk);
try {
LedgerMetadata ledgerMetadata = admin.getLedgerMetadata(handle);
System.out.println("current ledger metadata before recovery: " + ledgerMetadata);
} finally {
admin.close();
}
}
server.getManager().setActivatorPauseStatus(false);
server.getManager().triggerActivator(ActivatorRunRequest.TABLESPACEMANAGEMENT);
while (true) {
TableSpaceManager tableSpaceManager_after_failure = server.getManager().getTableSpaceManager(TableSpace.DEFAULT);
System.out.println("tableSpaceManager_after_failure:" + tableSpaceManager_after_failure);
System.out.println("tableSpaceManager:" + tableSpaceManager);
if (tableSpaceManager_after_failure != null && tableSpaceManager_after_failure != tableSpaceManager) {
break;
}
Thread.sleep(1000);
server.getManager().triggerActivator(ActivatorRunRequest.TABLESPACEMANAGEMENT);
}
TableSpaceManager tableSpaceManager_after_failure = server.getManager().getTableSpaceManager(TableSpace.DEFAULT);
Assert.assertNotNull(tableSpaceManager_after_failure);
assertNotSame(tableSpaceManager_after_failure, tableSpaceManager);
assertTrue(!tableSpaceManager_after_failure.isFailed());
// the insert should succeed because the trasaction has been rolledback automatically
server.getManager().executeUpdate(new InsertStatement(TableSpace.DEFAULT, "t1", RecordSerializer.makeRecord(table, "c", 4)), StatementEvaluationContext.DEFAULT_EVALUATION_CONTEXT(), TransactionContext.NO_TRANSACTION);
try (DataScanner scan = scan(server.getManager(), "select * from t1", Collections.emptyList())) {
assertEquals(1, scan.consume().size());
}
}
}
use of org.apache.bookkeeper.client.api.LedgerMetadata in project herddb by diennea.
the class ExpectedReplicaCountTest method testDisklessClusterReplication.
@Test
public void testDisklessClusterReplication() throws Exception {
TestStatsProvider statsProvider = new TestStatsProvider();
ServerConfiguration serverconfig_1 = newServerConfigurationWithAutoPort(folder.newFolder().toPath());
serverconfig_1.set(ServerConfiguration.PROPERTY_NODEID, "server1");
serverconfig_1.set(ServerConfiguration.PROPERTY_MODE, ServerConfiguration.PROPERTY_MODE_DISKLESSCLUSTER);
serverconfig_1.set(ServerConfiguration.PROPERTY_ZOOKEEPER_ADDRESS, testEnv.getAddress());
serverconfig_1.set(ServerConfiguration.PROPERTY_ZOOKEEPER_PATH, testEnv.getPath());
serverconfig_1.set(ServerConfiguration.PROPERTY_ZOOKEEPER_SESSIONTIMEOUT, testEnv.getTimeout());
try (Server server_1 = new Server(serverconfig_1)) {
server_1.start();
server_1.waitForStandaloneBoot();
TestUtils.execute(server_1.getManager(), "CREATE TABLESPACE 'ttt','leader:" + server_1.getNodeId() + "','expectedreplicacount:2'", Collections.emptyList());
// perform some writes
ClientConfiguration clientConfiguration = new ClientConfiguration();
clientConfiguration.set(ClientConfiguration.PROPERTY_MODE, ClientConfiguration.PROPERTY_MODE_CLUSTER);
clientConfiguration.set(ClientConfiguration.PROPERTY_ZOOKEEPER_ADDRESS, testEnv.getAddress());
clientConfiguration.set(ClientConfiguration.PROPERTY_ZOOKEEPER_PATH, testEnv.getPath());
clientConfiguration.set(ClientConfiguration.PROPERTY_ZOOKEEPER_SESSIONTIMEOUT, testEnv.getTimeout());
StatsLogger logger = statsProvider.getStatsLogger("ds");
try (HDBClient client1 = new HDBClient(clientConfiguration, logger)) {
try (HDBConnection connection = client1.openConnection()) {
// create table and insert data
connection.executeUpdate(TableSpace.DEFAULT, "CREATE TABLE ttt.t1(k1 int primary key, n1 int)", TransactionContext.NOTRANSACTION_ID, false, false, Collections.emptyList());
connection.executeUpdate(TableSpace.DEFAULT, "INSERT INTO ttt.t1(k1,n1) values(1,1)", TransactionContext.NOTRANSACTION_ID, false, false, Collections.emptyList());
connection.executeUpdate(TableSpace.DEFAULT, "INSERT INTO ttt.t1(k1,n1) values(2,1)", TransactionContext.NOTRANSACTION_ID, false, false, Collections.emptyList());
connection.executeUpdate(TableSpace.DEFAULT, "INSERT INTO ttt.t1(k1,n1) values(3,1)", TransactionContext.NOTRANSACTION_ID, false, false, Collections.emptyList());
// flush data pages to BK
server_1.getManager().checkpoint();
Set<Long> initialLedgers = new HashSet<>();
// verify that every ledger has ensemble size 2
try (BookKeeper bk = createBookKeeper()) {
BookKeeperAdmin admin = new BookKeeperAdmin(bk);
for (long lId : admin.listLedgers()) {
LedgerMetadata md = bk.getLedgerManager().readLedgerMetadata(lId).get().getValue();
if ("ttt".equals(new String(md.getCustomMetadata().get("tablespaceuuid"), StandardCharsets.UTF_8))) {
assertEquals(2, md.getEnsembleSize());
assertEquals(2, md.getWriteQuorumSize());
assertEquals(2, md.getAckQuorumSize());
initialLedgers.add(lId);
}
}
}
BookkeeperCommitLog log = (BookkeeperCommitLog) server_1.getManager().getTableSpaceManager("ttt").getLog();
final long currentLedgerId = log.getWriter().getLedgerId();
// downsize to expectedreplicacount = 1
TestUtils.execute(server_1.getManager(), "ALTER TABLESPACE 'ttt','leader:" + server_1.getNodeId() + "','expectedreplicacount:1'", Collections.emptyList());
// the TableSpaceManager will roll a new ledger
herddb.utils.TestUtils.waitForCondition(() -> {
if (log.getWriter() == null) {
return false;
}
long newLedgerId = log.getWriter().getLedgerId();
return newLedgerId != currentLedgerId;
}, herddb.utils.TestUtils.NOOP, 100);
// write some other record
connection.executeUpdate(TableSpace.DEFAULT, "INSERT INTO ttt.t1(k1,n1) values(4,1)", TransactionContext.NOTRANSACTION_ID, false, false, Collections.emptyList());
// flush data pages
server_1.getManager().checkpoint();
// verify that every ledger has ensemble size 2 or 1
try (BookKeeper bk = createBookKeeper()) {
BookKeeperAdmin admin = new BookKeeperAdmin(bk);
for (long lId : admin.listLedgers()) {
LedgerMetadata md = bk.getLedgerManager().readLedgerMetadata(lId).get().getValue();
if ("ttt".equals(new String(md.getCustomMetadata().get("tablespaceuuid"), StandardCharsets.UTF_8))) {
if (initialLedgers.contains(lId)) {
assertEquals(2, md.getEnsembleSize());
assertEquals(2, md.getWriteQuorumSize());
assertEquals(2, md.getAckQuorumSize());
} else {
assertEquals(1, md.getEnsembleSize());
assertEquals(1, md.getWriteQuorumSize());
assertEquals(1, md.getAckQuorumSize());
}
}
}
}
}
}
}
}
use of org.apache.bookkeeper.client.api.LedgerMetadata in project herddb by diennea.
the class BookkeeperCommitLog method recovery.
@Override
public void recovery(LogSequenceNumber snapshotSequenceNumber, BiConsumer<LogSequenceNumber, LogEntry> consumer, boolean fencing) throws LogNotAvailableException {
String tableSpaceDescription = tableSpaceDescription();
this.actualLedgersList = metadataManager.getActualLedgersList(tableSpaceUUID);
LOGGER.log(Level.INFO, "Actual ledgers list:{0} tableSpace {1}", new Object[] { actualLedgersList, tableSpaceDescription });
this.lastLedgerId = snapshotSequenceNumber.ledgerId;
this.currentLedgerId = snapshotSequenceNumber.ledgerId;
this.lastSequenceNumber.set(snapshotSequenceNumber.offset);
LOGGER.log(Level.INFO, "recovery from latest snapshotSequenceNumber:{0} tableSpace {1}, node {2}, fencing {3}", new Object[] { snapshotSequenceNumber, tableSpaceDescription, localNodeId, fencing });
if (!isRecoveryAvailable(snapshotSequenceNumber, actualLedgersList, tableSpaceDescription)) {
throw new FullRecoveryNeededException("Cannot recover from BookKeeper, not enough data, plese check the logs");
}
for (long ledgerId : actualLedgersList.getActiveLedgers()) {
try {
Versioned<LedgerMetadata> result = FutureUtils.result(bookKeeper.getLedgerManager().readLedgerMetadata(ledgerId));
LedgerMetadata metadata = result.getValue();
String ledgerLeader = extractLeaderFromMetadata(metadata.getCustomMetadata());
LOGGER.log(Level.INFO, "Ledger {0}: {1} {2} created by {3}, LastEntryId {4} Length {5}", new Object[] { String.valueOf(ledgerId), metadata.getState(), metadata.getAllEnsembles(), ledgerLeader, metadata.getLastEntryId(), metadata.getLength() });
} catch (BKException.BKNoSuchLedgerExistsException | BKException.BKNoSuchLedgerExistsOnMetadataServerException e) {
if (ledgerId < snapshotSequenceNumber.ledgerId) {
LOGGER.log(Level.INFO, "Actual ledgers list includes a not existing ledgerid:" + ledgerId + " tablespace " + tableSpaceDescription + ", but this ledger is not useful for recovery (snapshotSequenceNumber.ledgerId is " + snapshotSequenceNumber.ledgerId);
} else {
throw new FullRecoveryNeededException(new Exception("Actual ledgers list includes a not existing ledgerid:" + ledgerId + " tablespace " + tableSpaceDescription));
}
} catch (LogNotAvailableException e) {
throw e;
} catch (Exception e) {
throw new LogNotAvailableException(e);
}
}
try {
for (long ledgerId : actualLedgersList.getActiveLedgers()) {
if (ledgerId < snapshotSequenceNumber.ledgerId) {
LOGGER.log(Level.FINER, "Skipping ledger {0}", ledgerId);
continue;
}
ReadHandle handle;
try {
if (fencing) {
handle = bookKeeper.openLedger(ledgerId, BookKeeper.DigestType.CRC32C, SHARED_SECRET.getBytes(StandardCharsets.UTF_8));
} else {
handle = bookKeeper.openLedgerNoRecovery(ledgerId, BookKeeper.DigestType.CRC32C, SHARED_SECRET.getBytes(StandardCharsets.UTF_8));
}
} catch (org.apache.bookkeeper.client.api.BKException errorDuringOpen) {
throw new LogNotAvailableException("Cannot open ledger " + ledgerId + " (fencing " + fencing + "): " + errorDuringOpen, errorDuringOpen);
}
try {
long first;
if (ledgerId == snapshotSequenceNumber.ledgerId) {
first = snapshotSequenceNumber.offset;
if (first == -1) {
// this can happen if checkpoint happened while starting to follow a new ledger but actually no entry was ever read
LOGGER.log(Level.INFO, "Tablespace " + tableSpaceDescription + ", recovering from latest snapshot ledger " + ledgerId + ", first entry " + first + " is not valid. Adjusting to 0");
first = 0;
}
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "Tablespace " + tableSpaceDescription + ", recovering from latest snapshot ledger " + ledgerId + ", starting from entry " + first);
}
} else {
first = 0;
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "Tablespace " + tableSpaceDescription + ", recovering from ledger " + ledgerId + ", starting from entry " + first);
}
}
long lastAddConfirmed = handle.getLastAddConfirmed();
String ledgerLeader = extractLeaderFromMetadata(handle.getLedgerMetadata().getCustomMetadata());
LOGGER.log(Level.INFO, "Tablespace " + tableSpaceDescription + ", Recovering from ledger " + ledgerId + ", first=" + first + " lastAddConfirmed=" + lastAddConfirmed + " written by " + ledgerLeader);
if (lastAddConfirmed >= 0) {
for (long b = first; b <= lastAddConfirmed; ) {
long start = b;
long end = b + RECOVERY_BATCH_SIZE;
if (end > lastAddConfirmed) {
end = lastAddConfirmed;
}
b = end + 1;
double percent = ((start - first) * 100.0 / (lastAddConfirmed + 1));
int entriesToRead = (int) (1 + end - start);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "{3} From entry {0}, to entry {1} ({2} %)", new Object[] { start, end, percent, tableSpaceDescription });
}
long _start = System.currentTimeMillis();
int localEntryCount = 0;
try (LedgerEntries entries = handle.read(start, end)) {
for (org.apache.bookkeeper.client.api.LedgerEntry entry : entries) {
long entryId = entry.getEntryId();
LogSequenceNumber number = new LogSequenceNumber(ledgerId, entryId);
LogEntry statusEdit = readLogEntry(entry);
lastLedgerId = ledgerId;
currentLedgerId = ledgerId;
lastSequenceNumber.set(entryId);
if (number.after(snapshotSequenceNumber)) {
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "rec " + tableSpaceName + " #" + localEntryCount + " {0}, {1}", new Object[] { number, statusEdit });
}
consumer.accept(number, statusEdit);
} else {
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "skip " + tableSpaceName + " #" + localEntryCount + " {0}<{1}, {2}", new Object[] { number, snapshotSequenceNumber, statusEdit });
}
}
localEntryCount++;
}
}
LOGGER.log(Level.FINER, tableSpaceDescription() + " read " + localEntryCount + " entries from ledger " + ledgerId + ", expected " + entriesToRead);
if (localEntryCount != entriesToRead) {
throw new LogNotAvailableException(tableSpaceDescription() + " Read " + localEntryCount + " entries, expected " + entriesToRead);
}
lastLedgerId = ledgerId;
lastSequenceNumber.set(end);
long _stop = System.currentTimeMillis();
LOGGER.log(Level.INFO, "{4} From entry {0}, to entry {1} ({2} %) read time {3}", new Object[] { start, end, percent, (_stop - _start) + " ms", tableSpaceDescription });
}
}
} catch (RuntimeException err) {
LOGGER.log(Level.SEVERE, "Internal error while recovering tablespace " + tableSpaceDescription() + ": " + err, err);
throw err;
} finally {
handle.close();
}
}
LOGGER.log(Level.INFO, "After recovery of {0} lastSequenceNumber {1}", new Object[] { tableSpaceDescription, getLastSequenceNumber() });
} catch (IOException | InterruptedException | org.apache.bookkeeper.client.api.BKException err) {
LOGGER.log(Level.SEVERE, "Fatal error during recovery of " + tableSpaceDescription(), err);
signalLogFailed();
throw new LogNotAvailableException(err);
} catch (LogNotAvailableException err) {
LOGGER.log(Level.SEVERE, "Fatal error during recovery of " + tableSpaceDescription(), err);
signalLogFailed();
throw err;
}
}
Aggregations