Search in sources :

Example 1 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason in project accumulo by apache.

the class ZombieTServer method main.

public static void main(String[] args) throws Exception {
    Random random = new Random(System.currentTimeMillis() % 1000);
    int port = random.nextInt(30000) + 2000;
    Instance instance = HdfsZooInstance.getInstance();
    AccumuloServerContext context = new AccumuloServerContext(instance, new ServerConfigurationFactory(instance));
    TransactionWatcher watcher = new TransactionWatcher();
    final ThriftClientHandler tch = new ThriftClientHandler(context, watcher);
    Processor<Iface> processor = new Processor<>(tch);
    ServerAddress serverPort = TServerUtils.startTServer(context.getConfiguration(), ThriftServerType.CUSTOM_HS_HA, processor, "ZombieTServer", "walking dead", 2, 1, 1000, 10 * 1024 * 1024, null, null, -1, HostAndPort.fromParts("0.0.0.0", port));
    String addressString = serverPort.address.toString();
    String zPath = ZooUtil.getRoot(context.getInstance()) + Constants.ZTSERVERS + "/" + addressString;
    ZooReaderWriter zoo = ZooReaderWriter.getInstance();
    zoo.putPersistentData(zPath, new byte[] {}, NodeExistsPolicy.SKIP);
    ZooLock zlock = new ZooLock(zPath);
    LockWatcher lw = new LockWatcher() {

        @Override
        public void lostLock(final LockLossReason reason) {
            try {
                tch.halt(Tracer.traceInfo(), null, null);
            } catch (Exception ex) {
                log.error("Exception", ex);
                System.exit(1);
            }
        }

        @Override
        public void unableToMonitorLockNode(Throwable e) {
            try {
                tch.halt(Tracer.traceInfo(), null, null);
            } catch (Exception ex) {
                log.error("Exception", ex);
                System.exit(1);
            }
        }
    };
    byte[] lockContent = new ServerServices(addressString, Service.TSERV_CLIENT).toString().getBytes(UTF_8);
    if (zlock.tryLock(lw, lockContent)) {
        log.debug("Obtained tablet server lock {}", zlock.getLockPath());
    }
    // modify metadata
    synchronized (tch) {
        while (!tch.halted) {
            tch.wait();
        }
    }
    System.exit(0);
}
Also used : AccumuloServerContext(org.apache.accumulo.server.AccumuloServerContext) Processor(org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Processor) ServerServices(org.apache.accumulo.core.util.ServerServices) Instance(org.apache.accumulo.core.client.Instance) HdfsZooInstance(org.apache.accumulo.server.client.HdfsZooInstance) ServerAddress(org.apache.accumulo.server.rpc.ServerAddress) ZooReaderWriter(org.apache.accumulo.server.zookeeper.ZooReaderWriter) ServerConfigurationFactory(org.apache.accumulo.server.conf.ServerConfigurationFactory) TException(org.apache.thrift.TException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) Iface(org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Iface) TransactionWatcher(org.apache.accumulo.server.zookeeper.TransactionWatcher) Random(java.util.Random) LockWatcher(org.apache.accumulo.fate.zookeeper.ZooLock.LockWatcher) LockLossReason(org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason) ZooLock(org.apache.accumulo.server.zookeeper.ZooLock)

Example 2 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason in project accumulo by apache.

the class SplitRecoveryIT method run.

private void run() throws Exception {
    Instance inst = HdfsZooInstance.getInstance();
    AccumuloServerContext c = new AccumuloServerContext(inst, new ServerConfigurationFactory(inst));
    String zPath = ZooUtil.getRoot(inst) + "/testLock";
    IZooReaderWriter zoo = ZooReaderWriter.getInstance();
    zoo.putPersistentData(zPath, new byte[0], NodeExistsPolicy.OVERWRITE);
    ZooLock zl = new ZooLock(zPath);
    boolean gotLock = zl.tryLock(new LockWatcher() {

        @Override
        public void lostLock(LockLossReason reason) {
            System.exit(-1);
        }

        @Override
        public void unableToMonitorLockNode(Throwable e) {
            System.exit(-1);
        }
    }, "foo".getBytes(UTF_8));
    if (!gotLock) {
        System.err.println("Failed to get lock " + zPath);
    }
    // run test for a table with one tablet
    runSplitRecoveryTest(c, 0, "sp", 0, zl, nke("foo0", null, null));
    runSplitRecoveryTest(c, 1, "sp", 0, zl, nke("foo1", null, null));
    // run test for tables with two tablets, run test on first and last tablet
    runSplitRecoveryTest(c, 0, "k", 0, zl, nke("foo2", "m", null), nke("foo2", null, "m"));
    runSplitRecoveryTest(c, 1, "k", 0, zl, nke("foo3", "m", null), nke("foo3", null, "m"));
    runSplitRecoveryTest(c, 0, "o", 1, zl, nke("foo4", "m", null), nke("foo4", null, "m"));
    runSplitRecoveryTest(c, 1, "o", 1, zl, nke("foo5", "m", null), nke("foo5", null, "m"));
    // run test for table w/ three tablets, run test on middle tablet
    runSplitRecoveryTest(c, 0, "o", 1, zl, nke("foo6", "m", null), nke("foo6", "r", "m"), nke("foo6", null, "r"));
    runSplitRecoveryTest(c, 1, "o", 1, zl, nke("foo7", "m", null), nke("foo7", "r", "m"), nke("foo7", null, "r"));
    // run test for table w/ three tablets, run test on first
    runSplitRecoveryTest(c, 0, "g", 0, zl, nke("foo8", "m", null), nke("foo8", "r", "m"), nke("foo8", null, "r"));
    runSplitRecoveryTest(c, 1, "g", 0, zl, nke("foo9", "m", null), nke("foo9", "r", "m"), nke("foo9", null, "r"));
    // run test for table w/ three tablets, run test on last tablet
    runSplitRecoveryTest(c, 0, "w", 2, zl, nke("fooa", "m", null), nke("fooa", "r", "m"), nke("fooa", null, "r"));
    runSplitRecoveryTest(c, 1, "w", 2, zl, nke("foob", "m", null), nke("foob", "r", "m"), nke("foob", null, "r"));
}
Also used : AccumuloServerContext(org.apache.accumulo.server.AccumuloServerContext) Instance(org.apache.accumulo.core.client.Instance) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance) HdfsZooInstance(org.apache.accumulo.server.client.HdfsZooInstance) IZooReaderWriter(org.apache.accumulo.fate.zookeeper.IZooReaderWriter) LockWatcher(org.apache.accumulo.fate.zookeeper.ZooLock.LockWatcher) ServerConfigurationFactory(org.apache.accumulo.server.conf.ServerConfigurationFactory) LockLossReason(org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason) ZooLock(org.apache.accumulo.server.zookeeper.ZooLock)

Example 3 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason in project accumulo by apache.

the class SimpleGarbageCollector method getZooLock.

private void getZooLock(HostAndPort addr) throws KeeperException, InterruptedException {
    String path = ZooUtil.getRoot(getInstance()) + Constants.ZGC_LOCK;
    LockWatcher lockWatcher = new LockWatcher() {

        @Override
        public void lostLock(LockLossReason reason) {
            Halt.halt("GC lock in zookeeper lost (reason = " + reason + "), exiting!", 1);
        }

        @Override
        public void unableToMonitorLockNode(final Throwable e) {
            // ACCUMULO-3651 Level changed to error and FATAL added to message for slf4j compatibility
            Halt.halt(-1, new Runnable() {

                @Override
                public void run() {
                    log.error("FATAL: No longer able to monitor lock node ", e);
                }
            });
        }
    };
    while (true) {
        lock = new ZooLock(path);
        if (lock.tryLock(lockWatcher, new ServerServices(addr.toString(), Service.GC_CLIENT).toString().getBytes())) {
            log.debug("Got GC ZooKeeper lock");
            return;
        }
        log.debug("Failed to get GC ZooKeeper lock, will retry");
        sleepUninterruptibly(1, TimeUnit.SECONDS);
    }
}
Also used : ServerServices(org.apache.accumulo.core.util.ServerServices) LockWatcher(org.apache.accumulo.fate.zookeeper.ZooLock.LockWatcher) LockLossReason(org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason) ZooLock(org.apache.accumulo.server.zookeeper.ZooLock)

Example 4 with LockLossReason

use of org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason in project accumulo by apache.

the class TabletServer method announceExistence.

private void announceExistence() {
    IZooReaderWriter zoo = ZooReaderWriter.getInstance();
    try {
        String zPath = ZooUtil.getRoot(getInstance()) + Constants.ZTSERVERS + "/" + getClientAddressString();
        try {
            zoo.putPersistentData(zPath, new byte[] {}, NodeExistsPolicy.SKIP);
        } catch (KeeperException e) {
            if (KeeperException.Code.NOAUTH == e.code()) {
                log.error("Failed to write to ZooKeeper. Ensure that accumulo-site.xml, specifically instance.secret, is consistent.");
            }
            throw e;
        }
        tabletServerLock = new ZooLock(zPath);
        LockWatcher lw = new LockWatcher() {

            @Override
            public void lostLock(final LockLossReason reason) {
                Halt.halt(serverStopRequested ? 0 : 1, new Runnable() {

                    @Override
                    public void run() {
                        if (!serverStopRequested)
                            log.error("Lost tablet server lock (reason = {}), exiting.", reason);
                        gcLogger.logGCInfo(getConfiguration());
                    }
                });
            }

            @Override
            public void unableToMonitorLockNode(final Throwable e) {
                Halt.halt(1, new Runnable() {

                    @Override
                    public void run() {
                        log.error("Lost ability to monitor tablet server lock, exiting.", e);
                    }
                });
            }
        };
        byte[] lockContent = new ServerServices(getClientAddressString(), Service.TSERV_CLIENT).toString().getBytes(UTF_8);
        for (int i = 0; i < 120 / 5; i++) {
            zoo.putPersistentData(zPath, new byte[0], NodeExistsPolicy.SKIP);
            if (tabletServerLock.tryLock(lw, lockContent)) {
                log.debug("Obtained tablet server lock {}", tabletServerLock.getLockPath());
                lockID = tabletServerLock.getLockID().serialize(ZooUtil.getRoot(getInstance()) + Constants.ZTSERVERS + "/");
                return;
            }
            log.info("Waiting for tablet server lock");
            sleepUninterruptibly(5, TimeUnit.SECONDS);
        }
        String msg = "Too many retries, exiting.";
        log.info(msg);
        throw new RuntimeException(msg);
    } catch (Exception e) {
        log.info("Could not obtain tablet server lock, exiting.", e);
        throw new RuntimeException(e);
    }
}
Also used : ServerServices(org.apache.accumulo.core.util.ServerServices) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) TSampleNotPresentException(org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) ConstraintViolationException(org.apache.accumulo.core.tabletserver.thrift.ConstraintViolationException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) NotServingTabletException(org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) NoSuchScanIDException(org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException) CancellationException(java.util.concurrent.CancellationException) DistributedStoreException(org.apache.accumulo.server.master.state.DistributedStoreException) TException(org.apache.thrift.TException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException) BadLocationStateException(org.apache.accumulo.server.master.state.TabletLocationState.BadLocationStateException) TimeoutException(java.util.concurrent.TimeoutException) TabletClosedException(org.apache.accumulo.tserver.tablet.TabletClosedException) SampleNotPresentException(org.apache.accumulo.core.client.SampleNotPresentException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) IZooReaderWriter(org.apache.accumulo.fate.zookeeper.IZooReaderWriter) LockWatcher(org.apache.accumulo.fate.zookeeper.ZooLock.LockWatcher) LoggingRunnable(org.apache.accumulo.fate.util.LoggingRunnable) LockLossReason(org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason) KeeperException(org.apache.zookeeper.KeeperException) ZooLock(org.apache.accumulo.server.zookeeper.ZooLock)

Aggregations

LockLossReason (org.apache.accumulo.fate.zookeeper.ZooLock.LockLossReason)4 LockWatcher (org.apache.accumulo.fate.zookeeper.ZooLock.LockWatcher)4 ZooLock (org.apache.accumulo.server.zookeeper.ZooLock)4 ServerServices (org.apache.accumulo.core.util.ServerServices)3 Instance (org.apache.accumulo.core.client.Instance)2 ThriftSecurityException (org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException)2 IZooReaderWriter (org.apache.accumulo.fate.zookeeper.IZooReaderWriter)2 AccumuloServerContext (org.apache.accumulo.server.AccumuloServerContext)2 HdfsZooInstance (org.apache.accumulo.server.client.HdfsZooInstance)2 ServerConfigurationFactory (org.apache.accumulo.server.conf.ServerConfigurationFactory)2 TException (org.apache.thrift.TException)2 IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 Random (java.util.Random)1 CancellationException (java.util.concurrent.CancellationException)1 ExecutionException (java.util.concurrent.ExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 SampleNotPresentException (org.apache.accumulo.core.client.SampleNotPresentException)1