Search in sources :

Example 76 with KeeperException

use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project hive by apache.

the class HiveServer2 method addServerInstanceToZooKeeper.

/**
 * Adds a server instance to ZooKeeper as a znode.
 *
 * @param hiveConf
 * @throws Exception
 */
private void addServerInstanceToZooKeeper(HiveConf hiveConf, Map<String, String> confsToPublish) throws Exception {
    String zooKeeperEnsemble = ZooKeeperHiveHelper.getQuorumServers(hiveConf);
    String rootNamespace = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_ZOOKEEPER_NAMESPACE);
    String instanceURI = getServerInstanceURI();
    setUpZooKeeperAuth(hiveConf);
    int sessionTimeout = (int) hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, TimeUnit.MILLISECONDS);
    int baseSleepTime = (int) hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME, TimeUnit.MILLISECONDS);
    int maxRetries = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CONNECTION_MAX_RETRIES);
    // Create a CuratorFramework instance to be used as the ZooKeeper client
    // Use the zooKeeperAclProvider to create appropriate ACLs
    zooKeeperClient = CuratorFrameworkFactory.builder().connectString(zooKeeperEnsemble).sessionTimeoutMs(sessionTimeout).aclProvider(zooKeeperAclProvider).retryPolicy(new ExponentialBackoffRetry(baseSleepTime, maxRetries)).build();
    zooKeeperClient.start();
    // Create the parent znodes recursively; ignore if the parent already exists.
    try {
        zooKeeperClient.create().creatingParentsIfNeeded().withMode(CreateMode.PERSISTENT).forPath(ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace);
        LOG.info("Created the root name space: " + rootNamespace + " on ZooKeeper for HiveServer2");
    } catch (KeeperException e) {
        if (e.code() != KeeperException.Code.NODEEXISTS) {
            LOG.error("Unable to create HiveServer2 namespace: " + rootNamespace + " on ZooKeeper", e);
            throw e;
        }
    }
    // Znode name: serverUri=host:port;version=versionInfo;sequence=sequenceNumber
    try {
        String pathPrefix = ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + "serverUri=" + instanceURI + ";" + "version=" + HiveVersionInfo.getVersion() + ";" + "sequence=";
        String znodeData = "";
        if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ZOOKEEPER_PUBLISH_CONFIGS)) {
            // HiveServer2 configs that this instance will publish to ZooKeeper,
            // so that the clients can read these and configure themselves properly.
            addConfsToPublish(hiveConf, confsToPublish, instanceURI);
            // Publish configs for this instance as the data on the node
            znodeData = Joiner.on(';').withKeyValueSeparator("=").join(confsToPublish);
        } else {
            znodeData = instanceURI;
        }
        byte[] znodeDataUTF8 = znodeData.getBytes(Charset.forName("UTF-8"));
        znode = new PersistentEphemeralNode(zooKeeperClient, PersistentEphemeralNode.Mode.EPHEMERAL_SEQUENTIAL, pathPrefix, znodeDataUTF8);
        znode.start();
        // We'll wait for 120s for node creation
        long znodeCreationTimeout = 120;
        if (!znode.waitForInitialCreate(znodeCreationTimeout, TimeUnit.SECONDS)) {
            throw new Exception("Max znode creation wait time: " + znodeCreationTimeout + "s exhausted");
        }
        setDeregisteredWithZooKeeper(false);
        final String znodePath = znode.getActualPath();
        // Set a watch on the znode
        if (zooKeeperClient.checkExists().usingWatcher(new DeRegisterWatcher()).forPath(znodePath) == null) {
            // No node exists, throw exception
            throw new Exception("Unable to create znode for this HiveServer2 instance on ZooKeeper.");
        }
        LOG.info("Created a znode on ZooKeeper for HiveServer2 uri: " + instanceURI);
    } catch (Exception e) {
        LOG.error("Unable to create a znode for this server instance", e);
        if (znode != null) {
            znode.close();
        }
        throw (e);
    }
}
Also used : ExponentialBackoffRetry(org.apache.curator.retry.ExponentialBackoffRetry) PersistentEphemeralNode(org.apache.curator.framework.recipes.nodes.PersistentEphemeralNode) KeeperException(org.apache.zookeeper.KeeperException) ServiceException(org.apache.hive.service.ServiceException) LogInitializationException(org.apache.hadoop.hive.common.LogUtils.LogInitializationException) IOException(java.io.IOException) ParseException(org.apache.commons.cli.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeeperException(org.apache.zookeeper.KeeperException)

Example 77 with KeeperException

use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project fabric8 by jboss-fuse.

the class PrepRequestProcessor method pRequest.

/**
 * This method will be called inside the ProcessRequestThread, which is a
 * singleton, so there will be a single thread calling this code.
 *
 * @param request
 */
@SuppressWarnings("unchecked")
protected void pRequest(Request request) throws RequestProcessorException {
    // LOG.info("Prep>>> cxid = " + request.cxid + " type = " +
    // request.type + " id = 0x" + Long.toHexString(request.sessionId));
    request.hdr = null;
    request.txn = null;
    try {
        switch(request.type) {
            case OpCode.create:
                CreateRequest createRequest = new CreateRequest();
                pRequest2Txn(request.type, zks.getNextZxid(), request, createRequest, true);
                break;
            case OpCode.delete:
                DeleteRequest deleteRequest = new DeleteRequest();
                pRequest2Txn(request.type, zks.getNextZxid(), request, deleteRequest, true);
                break;
            case OpCode.setData:
                SetDataRequest setDataRequest = new SetDataRequest();
                pRequest2Txn(request.type, zks.getNextZxid(), request, setDataRequest, true);
                break;
            case OpCode.setACL:
                SetACLRequest setAclRequest = new SetACLRequest();
                pRequest2Txn(request.type, zks.getNextZxid(), request, setAclRequest, true);
                break;
            case OpCode.check:
                CheckVersionRequest checkRequest = new CheckVersionRequest();
                pRequest2Txn(request.type, zks.getNextZxid(), request, checkRequest, true);
                break;
            case OpCode.multi:
                MultiTransactionRecord multiRequest = new MultiTransactionRecord();
                try {
                    ByteBufferInputStream.byteBuffer2Record(request.request, multiRequest);
                } catch (IOException e) {
                    request.hdr = new TxnHeader(request.sessionId, request.cxid, zks.getNextZxid(), zks.getTime(), OpCode.multi);
                    throw e;
                }
                List<Txn> txns = new ArrayList<Txn>();
                // Each op in a multi-op must have the same zxid!
                long zxid = zks.getNextZxid();
                KeeperException ke = null;
                // Store off current pending change records in case we need to rollback
                HashMap<String, ChangeRecord> pendingChanges = getPendingChanges(multiRequest);
                int index = 0;
                for (Op op : multiRequest) {
                    Record subrequest = op.toRequestRecord();
                    /* If we've already failed one of the ops, don't bother
                     * trying the rest as we know it's going to fail and it
                     * would be confusing in the logfiles.
                     */
                    if (ke != null) {
                        request.hdr.setType(OpCode.error);
                        request.txn = new ErrorTxn(Code.RUNTIMEINCONSISTENCY.intValue());
                    } else /* Prep the request and convert to a Txn */
                    {
                        try {
                            pRequest2Txn(op.getType(), zxid, request, subrequest, false);
                        } catch (KeeperException e) {
                            if (ke == null) {
                                ke = e;
                            }
                            request.hdr.setType(OpCode.error);
                            request.txn = new ErrorTxn(e.code().intValue());
                            if (!(request.type == OpCode.exists)) {
                                // INFO log only if we're not asking for existence of a node
                                // as this is absolutely normal to ask if a node exists and it doesn't exist
                                LOG.info("Got user-level KeeperException when processing " + request.toString() + " aborting remaining multi ops." + " Error Path:" + e.getPath() + " Error:" + e.getMessage());
                            }
                            request.setException(e);
                            /* Rollback change records from failed multi-op */
                            rollbackPendingChanges(zxid, pendingChanges);
                        }
                    }
                    // FIXME: I don't want to have to serialize it here and then
                    // immediately deserialize in next processor. But I'm
                    // not sure how else to get the txn stored into our list.
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    BinaryOutputArchive boa = BinaryOutputArchive.getArchive(baos);
                    request.txn.serialize(boa, "request");
                    ByteBuffer bb = ByteBuffer.wrap(baos.toByteArray());
                    txns.add(new Txn(request.hdr.getType(), bb.array()));
                    index++;
                }
                request.hdr = new TxnHeader(request.sessionId, request.cxid, zxid, zks.getTime(), request.type);
                request.txn = new MultiTxn(txns);
                break;
            // create/close session don't require request record
            case OpCode.createSession:
            case OpCode.closeSession:
                pRequest2Txn(request.type, zks.getNextZxid(), request, null, true);
                break;
            // All the rest don't need to create a Txn - just verify session
            case OpCode.sync:
            case OpCode.exists:
            case OpCode.getData:
            case OpCode.getACL:
            case OpCode.getChildren:
            case OpCode.getChildren2:
            case OpCode.ping:
            case OpCode.setWatches:
                zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
                break;
        }
    } catch (KeeperException e) {
        if (request.hdr != null) {
            request.hdr.setType(OpCode.error);
            request.txn = new ErrorTxn(e.code().intValue());
        }
        LOG.info("Got user-level KeeperException when processing " + request.toString() + " Error Path:" + e.getPath() + " Error:" + e.getMessage());
        request.setException(e);
    } catch (Exception e) {
        // log at error level as we are returning a marshalling
        // error to the user
        LOG.error("Failed to process " + request, e);
        StringBuilder sb = new StringBuilder();
        ByteBuffer bb = request.request;
        if (bb != null) {
            bb.rewind();
            while (bb.hasRemaining()) {
                sb.append(Integer.toHexString(bb.get() & 0xff));
            }
        } else {
            sb.append("request buffer is null");
        }
        LOG.error("Dumping request buffer: 0x" + sb.toString());
        if (request.hdr != null) {
            request.hdr.setType(OpCode.error);
            request.txn = new ErrorTxn(Code.MARSHALLINGERROR.intValue());
        }
    }
    request.zxid = zks.getZxid();
    nextProcessor.processRequest(request);
}
Also used : Op(org.apache.zookeeper.Op) BinaryOutputArchive(org.apache.jute.BinaryOutputArchive) CheckVersionRequest(org.apache.zookeeper.proto.CheckVersionRequest) MultiTxn(org.apache.zookeeper.txn.MultiTxn) CreateRequest(org.apache.zookeeper.proto.CreateRequest) ArrayList(java.util.ArrayList) SetACLTxn(org.apache.zookeeper.txn.SetACLTxn) SetDataTxn(org.apache.zookeeper.txn.SetDataTxn) CheckVersionTxn(org.apache.zookeeper.txn.CheckVersionTxn) CreateSessionTxn(org.apache.zookeeper.txn.CreateSessionTxn) CreateTxn(org.apache.zookeeper.txn.CreateTxn) Txn(org.apache.zookeeper.txn.Txn) MultiTxn(org.apache.zookeeper.txn.MultiTxn) ErrorTxn(org.apache.zookeeper.txn.ErrorTxn) DeleteTxn(org.apache.zookeeper.txn.DeleteTxn) MultiTransactionRecord(org.apache.zookeeper.MultiTransactionRecord) Record(org.apache.jute.Record) ChangeRecord(org.apache.zookeeper.server.ZooKeeperServer.ChangeRecord) SetACLRequest(org.apache.zookeeper.proto.SetACLRequest) SetDataRequest(org.apache.zookeeper.proto.SetDataRequest) IOException(java.io.IOException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteBuffer(java.nio.ByteBuffer) XidRolloverException(org.apache.zookeeper.server.quorum.Leader.XidRolloverException) BadArgumentsException(org.apache.zookeeper.KeeperException.BadArgumentsException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) MultiTransactionRecord(org.apache.zookeeper.MultiTransactionRecord) ErrorTxn(org.apache.zookeeper.txn.ErrorTxn) DeleteRequest(org.apache.zookeeper.proto.DeleteRequest) ChangeRecord(org.apache.zookeeper.server.ZooKeeperServer.ChangeRecord) KeeperException(org.apache.zookeeper.KeeperException) TxnHeader(org.apache.zookeeper.txn.TxnHeader)

Example 78 with KeeperException

use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project accumulo by apache.

the class MiniAccumuloClusterImpl method start.

/**
 * Starts Accumulo and Zookeeper processes. Can only be called once.
 */
@Override
public synchronized void start() throws IOException, InterruptedException {
    if (config.useMiniDFS() && miniDFS == null) {
        throw new IllegalStateException("Cannot restart mini when using miniDFS");
    }
    MiniAccumuloClusterControl control = getClusterControl();
    if (config.useExistingInstance()) {
        Configuration acuConf = config.getAccumuloConfiguration();
        Configuration hadoopConf = config.getHadoopConfiguration();
        ConfigurationCopy cc = new ConfigurationCopy(acuConf);
        VolumeManager fs;
        try {
            fs = VolumeManagerImpl.get(cc, hadoopConf);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        Path instanceIdPath = Accumulo.getAccumuloInstanceIdPath(fs);
        String instanceIdFromFile = ZooUtil.getInstanceIDFromHdfs(instanceIdPath, cc, hadoopConf);
        IZooReaderWriter zrw = new ZooReaderWriterFactory().getZooReaderWriter(cc.get(Property.INSTANCE_ZK_HOST), (int) cc.getTimeInMillis(Property.INSTANCE_ZK_TIMEOUT), cc.get(Property.INSTANCE_SECRET));
        String rootPath = ZooUtil.getRoot(instanceIdFromFile);
        String instanceName = null;
        try {
            for (String name : zrw.getChildren(Constants.ZROOT + Constants.ZINSTANCES)) {
                String instanceNamePath = Constants.ZROOT + Constants.ZINSTANCES + "/" + name;
                byte[] bytes = zrw.getData(instanceNamePath, new Stat());
                String iid = new String(bytes, UTF_8);
                if (iid.equals(instanceIdFromFile)) {
                    instanceName = name;
                }
            }
        } catch (KeeperException e) {
            throw new RuntimeException("Unable to read instance name from zookeeper.", e);
        }
        if (instanceName == null)
            throw new RuntimeException("Unable to read instance name from zookeeper.");
        config.setInstanceName(instanceName);
        if (!AccumuloStatus.isAccumuloOffline(zrw, rootPath))
            throw new RuntimeException("The Accumulo instance being used is already running. Aborting.");
    } else {
        if (!initialized) {
            Runtime.getRuntime().addShutdownHook(new Thread() {

                @Override
                public void run() {
                    try {
                        MiniAccumuloClusterImpl.this.stop();
                    } catch (IOException e) {
                        log.error("IOException while attempting to stop the MiniAccumuloCluster.", e);
                    } catch (InterruptedException e) {
                        log.error("The stopping of MiniAccumuloCluster was interrupted.", e);
                    }
                }
            });
        }
        if (!config.useExistingZooKeepers())
            control.start(ServerType.ZOOKEEPER);
        if (!initialized) {
            if (!config.useExistingZooKeepers()) {
                // sleep a little bit to let zookeeper come up before calling init, seems to work better
                long startTime = System.currentTimeMillis();
                while (true) {
                    Socket s = null;
                    try {
                        s = new Socket("localhost", config.getZooKeeperPort());
                        s.setReuseAddress(true);
                        s.getOutputStream().write("ruok\n".getBytes());
                        s.getOutputStream().flush();
                        byte[] buffer = new byte[100];
                        int n = s.getInputStream().read(buffer);
                        if (n >= 4 && new String(buffer, 0, 4).equals("imok"))
                            break;
                    } catch (Exception e) {
                        if (System.currentTimeMillis() - startTime >= config.getZooKeeperStartupTime()) {
                            throw new ZooKeeperBindException("Zookeeper did not start within " + (config.getZooKeeperStartupTime() / 1000) + " seconds. Check the logs in " + config.getLogDir() + " for errors.  Last exception: " + e);
                        }
                        // Don't spin absurdly fast
                        Thread.sleep(250);
                    } finally {
                        if (s != null)
                            s.close();
                    }
                }
            }
            LinkedList<String> args = new LinkedList<>();
            args.add("--instance-name");
            args.add(config.getInstanceName());
            args.add("--user");
            args.add(config.getRootUserName());
            args.add("--clear-instance-name");
            // If we aren't using SASL, add in the root password
            final String saslEnabled = config.getSiteConfig().get(Property.INSTANCE_RPC_SASL_ENABLED.getKey());
            if (null == saslEnabled || !Boolean.parseBoolean(saslEnabled)) {
                args.add("--password");
                args.add(config.getRootPassword());
            }
            Process initProcess = exec(Initialize.class, args.toArray(new String[0]));
            int ret = initProcess.waitFor();
            if (ret != 0) {
                throw new RuntimeException("Initialize process returned " + ret + ". Check the logs in " + config.getLogDir() + " for errors.");
            }
            initialized = true;
        }
    }
    log.info("Starting MAC against instance {} and zookeeper(s) {}.", config.getInstanceName(), config.getZooKeepers());
    control.start(ServerType.TABLET_SERVER);
    int ret = 0;
    for (int i = 0; i < 5; i++) {
        ret = exec(Main.class, SetGoalState.class.getName(), MasterGoalState.NORMAL.toString()).waitFor();
        if (ret == 0)
            break;
        sleepUninterruptibly(1, TimeUnit.SECONDS);
    }
    if (ret != 0) {
        throw new RuntimeException("Could not set master goal state, process returned " + ret + ". Check the logs in " + config.getLogDir() + " for errors.");
    }
    control.start(ServerType.MASTER);
    control.start(ServerType.GARBAGE_COLLECTOR);
    if (null == executor) {
        executor = Executors.newSingleThreadExecutor();
    }
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) Configuration(org.apache.hadoop.conf.Configuration) DefaultConfiguration(org.apache.accumulo.core.conf.DefaultConfiguration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) ClientConfiguration(org.apache.accumulo.core.client.ClientConfiguration) ZooReaderWriterFactory(org.apache.accumulo.server.zookeeper.ZooReaderWriterFactory) Stat(org.apache.zookeeper.data.Stat) SetGoalState(org.apache.accumulo.master.state.SetGoalState) Path(org.apache.hadoop.fs.Path) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) TimeoutException(java.util.concurrent.TimeoutException) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ExecutionException(java.util.concurrent.ExecutionException) LinkedList(java.util.LinkedList) IZooReaderWriter(org.apache.accumulo.fate.zookeeper.IZooReaderWriter) KeeperException(org.apache.zookeeper.KeeperException) Socket(java.net.Socket)

Example 79 with KeeperException

use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project accumulo by apache.

the class Master method run.

public void run() throws IOException, InterruptedException, KeeperException {
    final String zroot = ZooUtil.getRoot(getInstance());
    // ACCUMULO-4424 Put up the Thrift servers before getting the lock as a sign of process health when a hot-standby
    // 
    // Start the Master's Client service
    clientHandler = new MasterClientServiceHandler(this);
    // Ensure that calls before the master gets the lock fail
    Iface haProxy = HighlyAvailableServiceWrapper.service(clientHandler, this);
    Iface rpcProxy = RpcWrapper.service(haProxy);
    final Processor<Iface> processor;
    if (ThriftServerType.SASL == getThriftServerType()) {
        Iface tcredsProxy = TCredentialsUpdatingWrapper.service(rpcProxy, clientHandler.getClass(), getConfiguration());
        processor = new Processor<>(tcredsProxy);
    } else {
        processor = new Processor<>(rpcProxy);
    }
    ServerAddress sa = TServerUtils.startServer(this, hostname, Property.MASTER_CLIENTPORT, processor, "Master", "Master Client Service Handler", null, Property.MASTER_MINTHREADS, Property.MASTER_THREADCHECK, Property.GENERAL_MAX_MESSAGE_SIZE);
    clientService = sa.server;
    log.info("Started Master client service at {}", sa.address);
    // Start the replication coordinator which assigns tservers to service replication requests
    MasterReplicationCoordinator impl = new MasterReplicationCoordinator(this);
    ReplicationCoordinator.Iface haReplicationProxy = HighlyAvailableServiceWrapper.service(impl, this);
    ReplicationCoordinator.Processor<ReplicationCoordinator.Iface> replicationCoordinatorProcessor = new ReplicationCoordinator.Processor<>(RpcWrapper.service(haReplicationProxy));
    ServerAddress replAddress = TServerUtils.startServer(this, hostname, Property.MASTER_REPLICATION_COORDINATOR_PORT, replicationCoordinatorProcessor, "Master Replication Coordinator", "Replication Coordinator", null, Property.MASTER_REPLICATION_COORDINATOR_MINTHREADS, Property.MASTER_REPLICATION_COORDINATOR_THREADCHECK, Property.GENERAL_MAX_MESSAGE_SIZE);
    log.info("Started replication coordinator service at " + replAddress.address);
    // block until we can obtain the ZK lock for the master
    getMasterLock(zroot + Constants.ZMASTER_LOCK);
    recoveryManager = new RecoveryManager(this);
    TableManager.getInstance().addObserver(this);
    StatusThread statusThread = new StatusThread();
    statusThread.start();
    MigrationCleanupThread migrationCleanupThread = new MigrationCleanupThread();
    migrationCleanupThread.start();
    tserverSet.startListeningForTabletServerChanges();
    ZooReaderWriter zReaderWriter = ZooReaderWriter.getInstance();
    zReaderWriter.getChildren(zroot + Constants.ZRECOVERY, new Watcher() {

        @Override
        public void process(WatchedEvent event) {
            nextEvent.event("Noticed recovery changes", event.getType());
            try {
                // watcher only fires once, add it back
                ZooReaderWriter.getInstance().getChildren(zroot + Constants.ZRECOVERY, this);
            } catch (Exception e) {
                log.error("Failed to add log recovery watcher back", e);
            }
        }
    });
    watchers.add(new TabletGroupWatcher(this, new MetaDataStateStore(this, this), null) {

        @Override
        boolean canSuspendTablets() {
            // Always allow user data tablets to enter suspended state.
            return true;
        }
    });
    watchers.add(new TabletGroupWatcher(this, new RootTabletStateStore(this, this), watchers.get(0)) {

        @Override
        boolean canSuspendTablets() {
            // be immediately reassigned, even if there's a global table.suspension.duration setting.
            return getConfiguration().getBoolean(Property.MASTER_METADATA_SUSPENDABLE);
        }
    });
    watchers.add(new TabletGroupWatcher(this, new ZooTabletStateStore(new ZooStore(zroot)), watchers.get(1)) {

        @Override
        boolean canSuspendTablets() {
            // Never allow root tablet to enter suspended state.
            return false;
        }
    });
    for (TabletGroupWatcher watcher : watchers) {
        watcher.start();
    }
    // Once we are sure the upgrade is complete, we can safely allow fate use.
    waitForMetadataUpgrade.await();
    try {
        final AgeOffStore<Master> store = new AgeOffStore<>(new org.apache.accumulo.fate.ZooStore<Master>(ZooUtil.getRoot(getInstance()) + Constants.ZFATE, ZooReaderWriter.getInstance()), 1000 * 60 * 60 * 8);
        int threads = getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE);
        fate = new Fate<>(this, store);
        fate.startTransactionRunners(threads);
        SimpleTimer.getInstance(getConfiguration()).schedule(new Runnable() {

            @Override
            public void run() {
                store.ageOff();
            }
        }, 63000, 63000);
    } catch (KeeperException | InterruptedException e) {
        throw new IOException(e);
    }
    ZooKeeperInitialization.ensureZooKeeperInitialized(zReaderWriter, zroot);
    // the master client service.
    if (null != authenticationTokenKeyManager && null != keyDistributor) {
        log.info("Starting delegation-token key manager");
        keyDistributor.initialize();
        authenticationTokenKeyManager.start();
        boolean logged = false;
        while (!authenticationTokenKeyManager.isInitialized()) {
            // Print out a status message when we start waiting for the key manager to get initialized
            if (!logged) {
                log.info("Waiting for AuthenticationTokenKeyManager to be initialized");
                logged = true;
            }
            sleepUninterruptibly(200, TimeUnit.MILLISECONDS);
        }
        // And log when we are initialized
        log.info("AuthenticationTokenSecretManager is initialized");
    }
    String address = sa.address.toString();
    log.info("Setting master lock data to {}", address);
    masterLock.replaceLockData(address.getBytes());
    while (!clientService.isServing()) {
        sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
    }
    // Start the daemon to scan the replication table and make units of work
    replicationWorkDriver = new ReplicationDriver(this);
    replicationWorkDriver.start();
    // Start the daemon to assign work to tservers to replicate to our peers
    try {
        replicationWorkAssigner = new WorkDriver(this);
    } catch (AccumuloException | AccumuloSecurityException e) {
        log.error("Caught exception trying to initialize replication WorkDriver", e);
        throw new RuntimeException(e);
    }
    replicationWorkAssigner.start();
    // Advertise that port we used so peers don't have to be told what it is
    ZooReaderWriter.getInstance().putPersistentData(ZooUtil.getRoot(getInstance()) + Constants.ZMASTER_REPLICATION_COORDINATOR_ADDR, replAddress.address.toString().getBytes(UTF_8), NodeExistsPolicy.OVERWRITE);
    // Register replication metrics
    MasterMetricsFactory factory = new MasterMetricsFactory(getConfiguration(), this);
    Metrics replicationMetrics = factory.createReplicationMetrics();
    try {
        replicationMetrics.register();
    } catch (Exception e) {
        log.error("Failed to register replication metrics", e);
    }
    // The master is fully initialized. Clients are allowed to connect now.
    masterInitialized.set(true);
    while (clientService.isServing()) {
        sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
    }
    log.info("Shutting down fate.");
    fate.shutdown();
    log.info("Shutting down timekeeping.");
    timeKeeper.shutdown();
    final long deadline = System.currentTimeMillis() + MAX_CLEANUP_WAIT_TIME;
    statusThread.join(remaining(deadline));
    replicationWorkAssigner.join(remaining(deadline));
    replicationWorkDriver.join(remaining(deadline));
    replAddress.server.stop();
    // Signal that we want it to stop, and wait for it to do so.
    if (authenticationTokenKeyManager != null) {
        authenticationTokenKeyManager.gracefulStop();
        authenticationTokenKeyManager.join(remaining(deadline));
    }
    // don't stop
    for (TabletGroupWatcher watcher : watchers) {
        watcher.join(remaining(deadline));
    }
    log.info("exiting");
}
Also used : Processor(org.apache.accumulo.core.master.thrift.MasterClientService.Processor) ServerAddress(org.apache.accumulo.server.rpc.ServerAddress) Watcher(org.apache.zookeeper.Watcher) MasterReplicationCoordinator(org.apache.accumulo.master.replication.MasterReplicationCoordinator) ReplicationCoordinator(org.apache.accumulo.core.replication.thrift.ReplicationCoordinator) WatchedEvent(org.apache.zookeeper.WatchedEvent) Iface(org.apache.accumulo.core.master.thrift.MasterClientService.Iface) Metrics(org.apache.accumulo.server.metrics.Metrics) RootTabletStateStore(org.apache.accumulo.server.master.state.RootTabletStateStore) MasterReplicationCoordinator(org.apache.accumulo.master.replication.MasterReplicationCoordinator) WorkDriver(org.apache.accumulo.master.replication.WorkDriver) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) AgeOffStore(org.apache.accumulo.fate.AgeOffStore) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ZooReaderWriter(org.apache.accumulo.server.zookeeper.ZooReaderWriter) IZooReaderWriter(org.apache.accumulo.fate.zookeeper.IZooReaderWriter) ZooStore(org.apache.accumulo.server.master.state.ZooStore) IOException(java.io.IOException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NoAuthException(org.apache.zookeeper.KeeperException.NoAuthException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) TException(org.apache.thrift.TException) IOException(java.io.IOException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException) TTransportException(org.apache.thrift.transport.TTransportException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) RecoveryManager(org.apache.accumulo.master.recovery.RecoveryManager) MetaDataStateStore(org.apache.accumulo.server.master.state.MetaDataStateStore) ReplicationDriver(org.apache.accumulo.master.replication.ReplicationDriver) MasterMetricsFactory(org.apache.accumulo.master.metrics.MasterMetricsFactory) ZooTabletStateStore(org.apache.accumulo.server.master.state.ZooTabletStateStore) KeeperException(org.apache.zookeeper.KeeperException)

Example 80 with KeeperException

use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project accumulo by apache.

the class TableChangeStateIT method findFate.

/**
 * Checks fates in zookeeper looking for transaction associated with a compaction as a double check that the test will be valid because the running compaction
 * does have a fate transaction lock.
 *
 * @return true if corresponding fate transaction found, false otherwise
 */
private boolean findFate(final String tableName) {
    Instance instance = connector.getInstance();
    AdminUtil<String> admin = new AdminUtil<>(false);
    try {
        Table.ID tableId = Tables.getTableId(instance, tableName);
        log.trace("tid: {}", tableId);
        String secret = cluster.getSiteConfiguration().get(Property.INSTANCE_SECRET);
        IZooReaderWriter zk = new ZooReaderWriterFactory().getZooReaderWriter(instance.getZooKeepers(), instance.getZooKeepersSessionTimeOut(), secret);
        ZooStore<String> zs = new ZooStore<>(ZooUtil.getRoot(instance) + Constants.ZFATE, zk);
        AdminUtil.FateStatus fateStatus = admin.getStatus(zs, zk, ZooUtil.getRoot(instance) + Constants.ZTABLE_LOCKS + "/" + tableId, null, null);
        for (AdminUtil.TransactionStatus tx : fateStatus.getTransactions()) {
            if (tx.getTop().contains("CompactionDriver") && tx.getDebug().contains("CompactRange")) {
                return true;
            }
        }
    } catch (KeeperException | TableNotFoundException | InterruptedException ex) {
        throw new IllegalStateException(ex);
    }
    // did not find appropriate fate transaction for compaction.
    return Boolean.FALSE;
}
Also used : Table(org.apache.accumulo.core.client.impl.Table) Instance(org.apache.accumulo.core.client.Instance) ZooStore(org.apache.accumulo.fate.ZooStore) ZooReaderWriterFactory(org.apache.accumulo.server.zookeeper.ZooReaderWriterFactory) AdminUtil(org.apache.accumulo.fate.AdminUtil) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) IZooReaderWriter(org.apache.accumulo.fate.zookeeper.IZooReaderWriter) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

KeeperException (org.apache.zookeeper.KeeperException)566 IOException (java.io.IOException)188 Stat (org.apache.zookeeper.data.Stat)127 ZooKeeper (org.apache.zookeeper.ZooKeeper)87 ArrayList (java.util.ArrayList)51 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)45 Watcher (org.apache.zookeeper.Watcher)39 WatchedEvent (org.apache.zookeeper.WatchedEvent)38 Test (org.junit.jupiter.api.Test)38 CountDownLatch (java.util.concurrent.CountDownLatch)30 SolrException (org.apache.solr.common.SolrException)30 HashMap (java.util.HashMap)29 List (java.util.List)28 ACL (org.apache.zookeeper.data.ACL)27 Test (org.junit.Test)27 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)25 ServerName (org.apache.hadoop.hbase.ServerName)24 Map (java.util.Map)23 IZooReaderWriter (org.apache.accumulo.fate.zookeeper.IZooReaderWriter)23 InterruptedIOException (java.io.InterruptedIOException)20