Search in sources :

Example 1 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason in project accumulo by apache.

the class SplitRecoveryIT method run.

private void run(ServerContext c) throws Exception {
    var zPath = ServiceLock.path(c.getZooKeeperRoot() + "/testLock");
    ZooReaderWriter zoo = c.getZooReaderWriter();
    zoo.putPersistentData(zPath.toString(), new byte[0], NodeExistsPolicy.OVERWRITE);
    ServiceLock zl = new ServiceLock(zoo.getZooKeeper(), zPath, UUID.randomUUID());
    boolean gotLock = zl.tryLock(new LockWatcher() {

        @SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit() is a bad idea here, but okay for now, since it's a test")
        @Override
        public void lostLock(LockLossReason reason) {
            System.exit(-1);
        }

        @SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit() is a bad idea here, but okay for now, since it's a test")
        @Override
        public void unableToMonitorLockNode(Exception e) {
            System.exit(-1);
        }
    }, "foo".getBytes(UTF_8));
    if (!gotLock) {
        System.err.println("Failed to get lock " + zPath);
    }
    // run test for a table with one tablet
    runSplitRecoveryTest(c, 0, "sp", 0, zl, nke("foo0", null, null));
    runSplitRecoveryTest(c, 1, "sp", 0, zl, nke("foo1", null, null));
    // run test for tables with two tablets, run test on first and last tablet
    runSplitRecoveryTest(c, 0, "k", 0, zl, nke("foo2", "m", null), nke("foo2", null, "m"));
    runSplitRecoveryTest(c, 1, "k", 0, zl, nke("foo3", "m", null), nke("foo3", null, "m"));
    runSplitRecoveryTest(c, 0, "o", 1, zl, nke("foo4", "m", null), nke("foo4", null, "m"));
    runSplitRecoveryTest(c, 1, "o", 1, zl, nke("foo5", "m", null), nke("foo5", null, "m"));
    // run test for table w/ three tablets, run test on middle tablet
    runSplitRecoveryTest(c, 0, "o", 1, zl, nke("foo6", "m", null), nke("foo6", "r", "m"), nke("foo6", null, "r"));
    runSplitRecoveryTest(c, 1, "o", 1, zl, nke("foo7", "m", null), nke("foo7", "r", "m"), nke("foo7", null, "r"));
    // run test for table w/ three tablets, run test on first
    runSplitRecoveryTest(c, 0, "g", 0, zl, nke("foo8", "m", null), nke("foo8", "r", "m"), nke("foo8", null, "r"));
    runSplitRecoveryTest(c, 1, "g", 0, zl, nke("foo9", "m", null), nke("foo9", "r", "m"), nke("foo9", null, "r"));
    // run test for table w/ three tablets, run test on last tablet
    runSplitRecoveryTest(c, 0, "w", 2, zl, nke("fooa", "m", null), nke("fooa", "r", "m"), nke("fooa", null, "r"));
    runSplitRecoveryTest(c, 1, "w", 2, zl, nke("foob", "m", null), nke("foob", "r", "m"), nke("foob", null, "r"));
}
Also used : ServiceLock(org.apache.accumulo.fate.zookeeper.ServiceLock) LockWatcher(org.apache.accumulo.fate.zookeeper.ServiceLock.LockWatcher) ZooReaderWriter(org.apache.accumulo.fate.zookeeper.ZooReaderWriter) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) LockLossReason(org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason)

Example 2 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason in project accumulo by apache.

the class ZombieTServer method main.

public static void main(String[] args) throws Exception {
    int port = random.nextInt(30000) + 2000;
    var context = new ServerContext(SiteConfiguration.auto());
    TransactionWatcher watcher = new TransactionWatcher(context);
    final ThriftClientHandler tch = new ThriftClientHandler(context, watcher);
    Processor<Iface> processor = new Processor<>(tch);
    ServerAddress serverPort = TServerUtils.startTServer(context.getConfiguration(), ThriftServerType.CUSTOM_HS_HA, processor, "ZombieTServer", "walking dead", 2, ThreadPools.DEFAULT_TIMEOUT_MILLISECS, 1000, 10 * 1024 * 1024, null, null, -1, HostAndPort.fromParts("0.0.0.0", port));
    String addressString = serverPort.address.toString();
    var zLockPath = ServiceLock.path(context.getZooKeeperRoot() + Constants.ZTSERVERS + "/" + addressString);
    ZooReaderWriter zoo = context.getZooReaderWriter();
    zoo.putPersistentData(zLockPath.toString(), new byte[] {}, NodeExistsPolicy.SKIP);
    ServiceLock zlock = new ServiceLock(zoo.getZooKeeper(), zLockPath, UUID.randomUUID());
    LockWatcher lw = new LockWatcher() {

        @SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit() is a bad idea here, but okay for now, since it's a test")
        @Override
        public void lostLock(final LockLossReason reason) {
            try {
                tch.halt(TraceUtil.traceInfo(), null, null);
            } catch (Exception ex) {
                log.error("Exception", ex);
                System.exit(1);
            }
        }

        @SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit() is a bad idea here, but okay for now, since it's a test")
        @Override
        public void unableToMonitorLockNode(Exception e) {
            try {
                tch.halt(TraceUtil.traceInfo(), null, null);
            } catch (Exception ex) {
                log.error("Exception", ex);
                System.exit(1);
            }
        }
    };
    byte[] lockContent = new ServerServices(addressString, Service.TSERV_CLIENT).toString().getBytes(UTF_8);
    if (zlock.tryLock(lw, lockContent)) {
        log.debug("Obtained tablet server lock {}", zlock.getLockPath());
    }
    // modify metadata
    synchronized (tch) {
        while (!tch.halted) {
            tch.wait();
        }
    }
    System.exit(0);
}
Also used : Processor(org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Processor) ServerServices(org.apache.accumulo.core.util.ServerServices) ServerAddress(org.apache.accumulo.server.rpc.ServerAddress) ZooReaderWriter(org.apache.accumulo.fate.zookeeper.ZooReaderWriter) Iface(org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Iface) TransactionWatcher(org.apache.accumulo.server.zookeeper.TransactionWatcher) ServerContext(org.apache.accumulo.server.ServerContext) ServiceLock(org.apache.accumulo.fate.zookeeper.ServiceLock) LockWatcher(org.apache.accumulo.fate.zookeeper.ServiceLock.LockWatcher) LockLossReason(org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason)

Example 3 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason in project accumulo by apache.

the class Compactor method announceExistence.

/**
 * Set up nodes and locks in ZooKeeper for this Compactor
 *
 * @param clientAddress
 *          address of this Compactor
 * @throws KeeperException
 *           zookeeper error
 * @throws InterruptedException
 *           thread interrupted
 */
protected void announceExistence(HostAndPort clientAddress) throws KeeperException, InterruptedException {
    String hostPort = ExternalCompactionUtil.getHostPortString(clientAddress);
    ZooReaderWriter zoo = getContext().getZooReaderWriter();
    String compactorQueuePath = getContext().getZooKeeperRoot() + Constants.ZCOMPACTORS + "/" + this.queueName;
    String zPath = compactorQueuePath + "/" + hostPort;
    try {
        zoo.mkdirs(compactorQueuePath);
        zoo.putPersistentData(zPath, new byte[] {}, NodeExistsPolicy.SKIP);
    } catch (KeeperException e) {
        if (e.code() == KeeperException.Code.NOAUTH) {
            LOG.error("Failed to write to ZooKeeper. Ensure that" + " accumulo.properties, specifically instance.secret, is consistent.");
        }
        throw e;
    }
    compactorLock = new ServiceLock(getContext().getZooReaderWriter().getZooKeeper(), ServiceLock.path(zPath), compactorId);
    LockWatcher lw = new LockWatcher() {

        @Override
        public void lostLock(final LockLossReason reason) {
            Halt.halt(1, () -> {
                LOG.error("Compactor lost lock (reason = {}), exiting.", reason);
                gcLogger.logGCInfo(getConfiguration());
            });
        }

        @Override
        public void unableToMonitorLockNode(final Exception e) {
            Halt.halt(1, () -> LOG.error("Lost ability to monitor Compactor lock, exiting.", e));
        }
    };
    try {
        byte[] lockContent = new ServerServices(hostPort, Service.COMPACTOR_CLIENT).toString().getBytes(UTF_8);
        for (int i = 0; i < 25; i++) {
            zoo.putPersistentData(zPath, new byte[0], NodeExistsPolicy.SKIP);
            if (compactorLock.tryLock(lw, lockContent)) {
                LOG.debug("Obtained Compactor lock {}", compactorLock.getLockPath());
                return;
            }
            LOG.info("Waiting for Compactor lock");
            sleepUninterruptibly(5, TimeUnit.SECONDS);
        }
        String msg = "Too many retries, exiting.";
        LOG.info(msg);
        throw new RuntimeException(msg);
    } catch (Exception e) {
        LOG.info("Could not obtain tablet server lock, exiting.", e);
        throw new RuntimeException(e);
    }
}
Also used : ServerServices(org.apache.accumulo.core.util.ServerServices) ServiceLock(org.apache.accumulo.fate.zookeeper.ServiceLock) LockWatcher(org.apache.accumulo.fate.zookeeper.ServiceLock.LockWatcher) ZooReaderWriter(org.apache.accumulo.fate.zookeeper.ZooReaderWriter) LockLossReason(org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason) KeeperException(org.apache.zookeeper.KeeperException) TTransportException(org.apache.thrift.transport.TTransportException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ThriftSecurityException(org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException) RetriesExceededException(org.apache.accumulo.server.compaction.RetryableThriftCall.RetriesExceededException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) TException(org.apache.thrift.TException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ThriftTableOperationException(org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException) UnknownCompactionIdException(org.apache.accumulo.core.compaction.thrift.UnknownCompactionIdException)

Example 4 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason in project accumulo by apache.

the class SimpleGarbageCollector method getZooLock.

private void getZooLock(HostAndPort addr) throws KeeperException, InterruptedException {
    var path = ServiceLock.path(getContext().getZooKeeperRoot() + Constants.ZGC_LOCK);
    LockWatcher lockWatcher = new LockWatcher() {

        @Override
        public void lostLock(LockLossReason reason) {
            Halt.halt("GC lock in zookeeper lost (reason = " + reason + "), exiting!", 1);
        }

        @Override
        public void unableToMonitorLockNode(final Exception e) {
            // ACCUMULO-3651 Level changed to error and FATAL added to message for slf4j compatibility
            Halt.halt(-1, () -> log.error("FATAL: No longer able to monitor lock node ", e));
        }
    };
    UUID zooLockUUID = UUID.randomUUID();
    while (true) {
        ServiceLock lock = new ServiceLock(getContext().getZooReaderWriter().getZooKeeper(), path, zooLockUUID);
        if (lock.tryLock(lockWatcher, new ServerServices(addr.toString(), Service.GC_CLIENT).toString().getBytes())) {
            log.debug("Got GC ZooKeeper lock");
            return;
        }
        log.debug("Failed to get GC ZooKeeper lock, will retry");
        sleepUninterruptibly(1, TimeUnit.SECONDS);
    }
}
Also used : ServerServices(org.apache.accumulo.core.util.ServerServices) ServiceLock(org.apache.accumulo.fate.zookeeper.ServiceLock) LockWatcher(org.apache.accumulo.fate.zookeeper.ServiceLock.LockWatcher) LockLossReason(org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason) UUID(java.util.UUID) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) FileNotFoundException(java.io.FileNotFoundException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException)

Example 5 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason in project accumulo by apache.

the class TabletServer method announceExistence.

private void announceExistence() {
    ZooReaderWriter zoo = getContext().getZooReaderWriter();
    try {
        var zLockPath = ServiceLock.path(getContext().getZooKeeperRoot() + Constants.ZTSERVERS + "/" + getClientAddressString());
        try {
            zoo.putPersistentData(zLockPath.toString(), new byte[] {}, NodeExistsPolicy.SKIP);
        } catch (KeeperException e) {
            if (e.code() == KeeperException.Code.NOAUTH) {
                log.error("Failed to write to ZooKeeper. Ensure that" + " accumulo.properties, specifically instance.secret, is consistent.");
            }
            throw e;
        }
        tabletServerLock = new ServiceLock(zoo.getZooKeeper(), zLockPath, UUID.randomUUID());
        LockWatcher lw = new LockWatcher() {

            @Override
            public void lostLock(final LockLossReason reason) {
                Halt.halt(serverStopRequested ? 0 : 1, () -> {
                    if (!serverStopRequested) {
                        log.error("Lost tablet server lock (reason = {}), exiting.", reason);
                    }
                    gcLogger.logGCInfo(getConfiguration());
                });
            }

            @Override
            public void unableToMonitorLockNode(final Exception e) {
                Halt.halt(1, () -> log.error("Lost ability to monitor tablet server lock, exiting.", e));
            }
        };
        byte[] lockContent = new ServerServices(getClientAddressString(), Service.TSERV_CLIENT).toString().getBytes(UTF_8);
        for (int i = 0; i < 120 / 5; i++) {
            zoo.putPersistentData(zLockPath.toString(), new byte[0], NodeExistsPolicy.SKIP);
            if (tabletServerLock.tryLock(lw, lockContent)) {
                log.debug("Obtained tablet server lock {}", tabletServerLock.getLockPath());
                lockID = tabletServerLock.getLockID().serialize(getContext().getZooKeeperRoot() + Constants.ZTSERVERS + "/");
                return;
            }
            log.info("Waiting for tablet server lock");
            sleepUninterruptibly(5, TimeUnit.SECONDS);
        }
        String msg = "Too many retries, exiting.";
        log.info(msg);
        throw new RuntimeException(msg);
    } catch (Exception e) {
        log.info("Could not obtain tablet server lock, exiting.", e);
        throw new RuntimeException(e);
    }
}
Also used : ServerServices(org.apache.accumulo.core.util.ServerServices) ServiceLock(org.apache.accumulo.fate.zookeeper.ServiceLock) LockWatcher(org.apache.accumulo.fate.zookeeper.ServiceLock.LockWatcher) ZooReaderWriter(org.apache.accumulo.fate.zookeeper.ZooReaderWriter) LockLossReason(org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason) KeeperException(org.apache.zookeeper.KeeperException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) TException(org.apache.thrift.TException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

ServiceLock (org.apache.accumulo.fate.zookeeper.ServiceLock)5 LockLossReason (org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason)5 LockWatcher (org.apache.accumulo.fate.zookeeper.ServiceLock.LockWatcher)5 ServerServices (org.apache.accumulo.core.util.ServerServices)4 ZooReaderWriter (org.apache.accumulo.fate.zookeeper.ZooReaderWriter)4 IOException (java.io.IOException)3 KeeperException (org.apache.zookeeper.KeeperException)3 UnknownHostException (java.net.UnknownHostException)2 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)2 TException (org.apache.thrift.TException)2 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 FileNotFoundException (java.io.FileNotFoundException)1 UUID (java.util.UUID)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 ThriftSecurityException (org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException)1 ThriftTableOperationException (org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException)1 UnknownCompactionIdException (org.apache.accumulo.core.compaction.thrift.UnknownCompactionIdException)1 Iface (org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Iface)1 Processor (org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Processor)1