Search in sources :

Example 1 with RetriesExceededException

use of org.apache.accumulo.server.compaction.RetryableThriftCall.RetriesExceededException in project accumulo by apache.

the class Compactor method run.

@Override
public void run() {
    try {
        compactorAddress = startCompactorClientService();
    } catch (UnknownHostException e1) {
        throw new RuntimeException("Failed to start the compactor client service", e1);
    }
    final HostAndPort clientAddress = compactorAddress.getAddress();
    try {
        announceExistence(clientAddress);
    } catch (KeeperException | InterruptedException e) {
        throw new RuntimeException("Error registering compactor in ZooKeeper", e);
    }
    try {
        MetricsUtil.initializeMetrics(getContext().getConfiguration(), this.applicationName, clientAddress);
    } catch (Exception e1) {
        LOG.error("Error initializing metrics, metrics will not be emitted.", e1);
    }
    MetricsUtil.initializeProducers(this);
    LOG.info("Compactor started, waiting for work");
    try {
        final AtomicReference<Throwable> err = new AtomicReference<>();
        while (!shutdown) {
            currentCompactionId.set(null);
            err.set(null);
            JOB_HOLDER.reset();
            TExternalCompactionJob job;
            try {
                job = getNextJob(getNextId());
                if (!job.isSetExternalCompactionId()) {
                    LOG.trace("No external compactions in queue {}", this.queueName);
                    UtilWaitThread.sleep(getWaitTimeBetweenCompactionChecks());
                    continue;
                }
                if (!job.getExternalCompactionId().equals(currentCompactionId.get().toString())) {
                    throw new IllegalStateException("Returned eci " + job.getExternalCompactionId() + " does not match supplied eci " + currentCompactionId.get());
                }
            } catch (RetriesExceededException e2) {
                LOG.warn("Retries exceeded getting next job. Retrying...");
                continue;
            }
            LOG.debug("Received next compaction job: {}", job);
            final LongAdder totalInputEntries = new LongAdder();
            final LongAdder totalInputBytes = new LongAdder();
            final CountDownLatch started = new CountDownLatch(1);
            final CountDownLatch stopped = new CountDownLatch(1);
            final Thread compactionThread = Threads.createThread("Compaction job for tablet " + job.getExtent().toString(), createCompactionJob(job, totalInputEntries, totalInputBytes, started, stopped, err));
            JOB_HOLDER.set(job, compactionThread);
            try {
                // start the compactionThread
                compactionThread.start();
                // wait until the compactor is started
                started.await();
                final long inputEntries = totalInputEntries.sum();
                final long waitTime = calculateProgressCheckTime(totalInputBytes.sum());
                LOG.debug("Progress checks will occur every {} seconds", waitTime);
                String percentComplete = "unknown";
                while (!stopped.await(waitTime, TimeUnit.SECONDS)) {
                    List<CompactionInfo> running = org.apache.accumulo.server.compaction.FileCompactor.getRunningCompactions();
                    if (!running.isEmpty()) {
                        // Compaction has started. There should only be one in the list
                        CompactionInfo info = running.get(0);
                        if (info != null) {
                            if (inputEntries > 0) {
                                percentComplete = Float.toString((info.getEntriesRead() / (float) inputEntries) * 100);
                            }
                            String message = String.format("Compaction in progress, read %d of %d input entries ( %s %s ), written %d entries", info.getEntriesRead(), inputEntries, percentComplete, "%", info.getEntriesWritten());
                            watcher.run();
                            try {
                                LOG.debug("Updating coordinator with compaction progress: {}.", message);
                                TCompactionStatusUpdate update = new TCompactionStatusUpdate(TCompactionState.IN_PROGRESS, message, inputEntries, info.getEntriesRead(), info.getEntriesWritten());
                                updateCompactionState(job, update);
                            } catch (RetriesExceededException e) {
                                LOG.warn("Error updating coordinator with compaction progress, error: {}", e.getMessage());
                            }
                        }
                    } else {
                        LOG.error("Waiting on compaction thread to finish, but no RUNNING compaction");
                    }
                }
                compactionThread.join();
                LOG.trace("Compaction thread finished.");
                // Run the watcher again to clear out the finished compaction and set the
                // stuck count to zero.
                watcher.run();
                if (err.get() != null) {
                    // maybe the error occured because the table was deleted or something like that, so
                    // force a cancel check to possibly reduce noise in the logs
                    checkIfCanceled();
                }
                if (compactionThread.isInterrupted() || JOB_HOLDER.isCancelled() || (err.get() != null && err.get().getClass().equals(InterruptedException.class))) {
                    LOG.warn("Compaction thread was interrupted, sending CANCELLED state");
                    try {
                        TCompactionStatusUpdate update = new TCompactionStatusUpdate(TCompactionState.CANCELLED, "Compaction cancelled", -1, -1, -1);
                        updateCompactionState(job, update);
                        updateCompactionFailed(job);
                    } catch (RetriesExceededException e) {
                        LOG.error("Error updating coordinator with compaction cancellation.", e);
                    } finally {
                        currentCompactionId.set(null);
                    }
                } else if (err.get() != null) {
                    try {
                        LOG.info("Updating coordinator with compaction failure.");
                        TCompactionStatusUpdate update = new TCompactionStatusUpdate(TCompactionState.FAILED, "Compaction failed due to: " + err.get().getMessage(), -1, -1, -1);
                        updateCompactionState(job, update);
                        updateCompactionFailed(job);
                    } catch (RetriesExceededException e) {
                        LOG.error("Error updating coordinator with compaction failure.", e);
                    } finally {
                        currentCompactionId.set(null);
                    }
                } else {
                    try {
                        LOG.trace("Updating coordinator with compaction completion.");
                        updateCompactionCompleted(job, JOB_HOLDER.getStats());
                    } catch (RetriesExceededException e) {
                        LOG.error("Error updating coordinator with compaction completion, cancelling compaction.", e);
                        try {
                            cancel(job.getExternalCompactionId());
                        } catch (TException e1) {
                            LOG.error("Error cancelling compaction.", e1);
                        }
                    } finally {
                        currentCompactionId.set(null);
                    }
                }
            } catch (RuntimeException e1) {
                LOG.error("Compactor thread was interrupted waiting for compaction to start, cancelling job", e1);
                try {
                    cancel(job.getExternalCompactionId());
                } catch (TException e2) {
                    LOG.error("Error cancelling compaction.", e2);
                }
            } finally {
                currentCompactionId.set(null);
                // avoid multiple threads updating shared state.
                while (compactionThread.isAlive()) {
                    compactionThread.interrupt();
                    compactionThread.join(1000);
                }
            }
        }
    } catch (Exception e) {
        LOG.error("Unhandled error occurred in Compactor", e);
    } finally {
        // Shutdown local thrift server
        LOG.info("Stopping Thrift Servers");
        if (compactorAddress.server != null) {
            compactorAddress.server.stop();
        }
        try {
            LOG.debug("Closing filesystems");
            VolumeManager mgr = getContext().getVolumeManager();
            if (null != mgr) {
                mgr.close();
            }
        } catch (IOException e) {
            LOG.warn("Failed to close filesystem : {}", e.getMessage(), e);
        }
        gcLogger.logGCInfo(getConfiguration());
        LOG.info("stop requested. exiting ... ");
        try {
            if (null != compactorLock) {
                compactorLock.unlock();
            }
        } catch (Exception e) {
            LOG.warn("Failed to release compactor lock", e);
        }
    }
}
Also used : TException(org.apache.thrift.TException) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) UnknownHostException(java.net.UnknownHostException) TCompactionStatusUpdate(org.apache.accumulo.core.compaction.thrift.TCompactionStatusUpdate) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) TTransportException(org.apache.thrift.transport.TTransportException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ThriftSecurityException(org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException) RetriesExceededException(org.apache.accumulo.server.compaction.RetryableThriftCall.RetriesExceededException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) TException(org.apache.thrift.TException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ThriftTableOperationException(org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException) UnknownCompactionIdException(org.apache.accumulo.core.compaction.thrift.UnknownCompactionIdException) UtilWaitThread(org.apache.accumulo.fate.util.UtilWaitThread) HostAndPort(org.apache.accumulo.core.util.HostAndPort) LongAdder(java.util.concurrent.atomic.LongAdder) RetriesExceededException(org.apache.accumulo.server.compaction.RetryableThriftCall.RetriesExceededException) CompactionInfo(org.apache.accumulo.server.compaction.CompactionInfo) TExternalCompactionJob(org.apache.accumulo.core.tabletserver.thrift.TExternalCompactionJob) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 LongAdder (java.util.concurrent.atomic.LongAdder)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1 ThriftSecurityException (org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException)1 ThriftTableOperationException (org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException)1 TCompactionStatusUpdate (org.apache.accumulo.core.compaction.thrift.TCompactionStatusUpdate)1 UnknownCompactionIdException (org.apache.accumulo.core.compaction.thrift.UnknownCompactionIdException)1 TExternalCompactionJob (org.apache.accumulo.core.tabletserver.thrift.TExternalCompactionJob)1 HostAndPort (org.apache.accumulo.core.util.HostAndPort)1 UtilWaitThread (org.apache.accumulo.fate.util.UtilWaitThread)1 CompactionInfo (org.apache.accumulo.server.compaction.CompactionInfo)1 RetriesExceededException (org.apache.accumulo.server.compaction.RetryableThriftCall.RetriesExceededException)1 VolumeManager (org.apache.accumulo.server.fs.VolumeManager)1 TException (org.apache.thrift.TException)1 TTransportException (org.apache.thrift.transport.TTransportException)1 KeeperException (org.apache.zookeeper.KeeperException)1