use of org.apache.accumulo.core.compaction.thrift.TCompactionStatusUpdate in project accumulo by apache.
the class ExternalDoNothingCompactor method createCompactionJob.
@Override
protected Runnable createCompactionJob(TExternalCompactionJob job, LongAdder totalInputEntries, LongAdder totalInputBytes, CountDownLatch started, CountDownLatch stopped, AtomicReference<Throwable> err) {
// Set this to true so that only 1 external compaction is run
this.shutdown = true;
return new Runnable() {
@Override
public void run() {
try {
LOG.info("Starting up compaction runnable for job: {}", job);
TCompactionStatusUpdate update = new TCompactionStatusUpdate();
update.setState(TCompactionState.STARTED);
update.setMessage("Compaction started");
updateCompactionState(job, update);
LOG.info("Starting compactor");
started.countDown();
while (!JOB_HOLDER.isCancelled()) {
LOG.info("Sleeping while job is not cancelled");
UtilWaitThread.sleep(1000);
}
// Compactor throws this exception when cancelled
throw new CompactionCanceledException();
} catch (Exception e) {
LOG.error("Compaction failed", e);
err.set(e);
} finally {
stopped.countDown();
}
}
};
}
use of org.apache.accumulo.core.compaction.thrift.TCompactionStatusUpdate in project accumulo by apache.
the class CompactionCoordinator method run.
@Override
public void run() {
ServerAddress coordinatorAddress = null;
try {
coordinatorAddress = startCoordinatorClientService();
} catch (UnknownHostException e1) {
throw new RuntimeException("Failed to start the coordinator service", e1);
}
final HostAndPort clientAddress = coordinatorAddress.address;
try {
getCoordinatorLock(clientAddress);
} catch (KeeperException | InterruptedException e) {
throw new IllegalStateException("Exception getting Coordinator lock", e);
}
try {
MetricsUtil.initializeMetrics(getContext().getConfiguration(), this.applicationName, clientAddress);
} catch (Exception e1) {
LOG.error("Error initializing metrics, metrics will not be emitted.", e1);
}
// On a re-start of the coordinator it's possible that external compactions are in-progress.
// Attempt to get the running compactions on the compactors and then resolve which tserver
// the external compaction came from to re-populate the RUNNING collection.
LOG.info("Checking for running external compactions");
// On re-start contact the running Compactors to try and seed the list of running compactions
List<RunningCompaction> running = ExternalCompactionUtil.getCompactionsRunningOnCompactors(getContext());
if (running.isEmpty()) {
LOG.info("No running external compactions found");
} else {
LOG.info("Found {} running external compactions", running.size());
running.forEach(rc -> {
TCompactionStatusUpdate update = new TCompactionStatusUpdate();
update.setState(TCompactionState.IN_PROGRESS);
update.setMessage("Coordinator restarted, compaction found in progress");
rc.addUpdate(System.currentTimeMillis(), update);
RUNNING.put(ExternalCompactionId.of(rc.getJob().getExternalCompactionId()), rc);
});
}
tserverSet.startListeningForTabletServerChanges();
startDeadCompactionDetector();
LOG.info("Starting loop to check tservers for compaction summaries");
while (!shutdown) {
long start = System.currentTimeMillis();
updateSummaries();
long now = System.currentTimeMillis();
TIME_COMPACTOR_LAST_CHECKED.forEach((k, v) -> {
if ((now - v) > getMissingCompactorWarningTime()) {
LOG.warn("No compactors have checked in with coordinator for queue {} in {}ms", k, getMissingCompactorWarningTime());
}
});
long checkInterval = getTServerCheckInterval();
long duration = (System.currentTimeMillis() - start);
if (checkInterval - duration > 0) {
LOG.debug("Waiting {}ms for next tserver check", (checkInterval - duration));
UtilWaitThread.sleep(checkInterval - duration);
}
}
LOG.info("Shutting down");
}
use of org.apache.accumulo.core.compaction.thrift.TCompactionStatusUpdate in project accumulo by apache.
the class Compactor method createCompactionJob.
/**
* Create compaction runnable
*
* @param job
* compaction job
* @param totalInputEntries
* object to capture total entries
* @param totalInputBytes
* object to capture input file size
* @param started
* started latch
* @param stopped
* stopped latch
* @param err
* reference to error
* @return Runnable compaction job
*/
protected Runnable createCompactionJob(final TExternalCompactionJob job, final LongAdder totalInputEntries, final LongAdder totalInputBytes, final CountDownLatch started, final CountDownLatch stopped, final AtomicReference<Throwable> err) {
return new Runnable() {
@Override
public void run() {
// Its only expected that a single compaction runs at a time. Multiple compactions running
// at a time could cause odd behavior like out of order and unexpected thrift calls to the
// coordinator. This is a sanity check to ensure the expectation is met. Should this check
// ever fail, it means there is a bug elsewhere.
Preconditions.checkState(compactionRunning.compareAndSet(false, true));
try {
LOG.info("Starting up compaction runnable for job: {}", job);
TCompactionStatusUpdate update = new TCompactionStatusUpdate(TCompactionState.STARTED, "Compaction started", -1, -1, -1);
updateCompactionState(job, update);
final AccumuloConfiguration tConfig;
var extent = KeyExtent.fromThrift(job.getExtent());
if (!job.getOverrides().isEmpty()) {
tConfig = new ConfigurationCopy(getContext().getTableConfiguration(extent.tableId()));
job.getOverrides().forEach((k, v) -> ((ConfigurationCopy) tConfig).set(k, v));
LOG.debug("Overriding table properties with {}", job.getOverrides());
} else {
tConfig = getContext().getTableConfiguration(extent.tableId());
}
final TabletFile outputFile = new TabletFile(new Path(job.getOutputFile()));
final Map<StoredTabletFile, DataFileValue> files = new TreeMap<>();
job.getFiles().forEach(f -> {
files.put(new StoredTabletFile(f.getMetadataFileEntry()), new DataFileValue(f.getSize(), f.getEntries(), f.getTimestamp()));
totalInputEntries.add(f.getEntries());
totalInputBytes.add(f.getSize());
});
final List<IteratorSetting> iters = new ArrayList<>();
job.getIteratorSettings().getIterators().forEach(tis -> iters.add(SystemIteratorUtil.toIteratorSetting(tis)));
ExtCEnv cenv = new ExtCEnv(JOB_HOLDER, queueName);
FileCompactor compactor = new FileCompactor(getContext(), extent, files, outputFile, job.isPropagateDeletes(), cenv, iters, tConfig);
LOG.trace("Starting compactor");
started.countDown();
org.apache.accumulo.server.compaction.CompactionStats stat = compactor.call();
TCompactionStats cs = new TCompactionStats();
cs.setEntriesRead(stat.getEntriesRead());
cs.setEntriesWritten(stat.getEntriesWritten());
cs.setFileSize(stat.getFileSize());
JOB_HOLDER.setStats(cs);
LOG.info("Compaction completed successfully {} ", job.getExternalCompactionId());
// Update state when completed
TCompactionStatusUpdate update2 = new TCompactionStatusUpdate(TCompactionState.SUCCEEDED, "Compaction completed successfully", -1, -1, -1);
updateCompactionState(job, update2);
} catch (Exception e) {
LOG.error("Compaction failed", e);
err.set(e);
} finally {
stopped.countDown();
Preconditions.checkState(compactionRunning.compareAndSet(true, false));
}
}
};
}
use of org.apache.accumulo.core.compaction.thrift.TCompactionStatusUpdate in project accumulo by apache.
the class Compactor method run.
@Override
public void run() {
try {
compactorAddress = startCompactorClientService();
} catch (UnknownHostException e1) {
throw new RuntimeException("Failed to start the compactor client service", e1);
}
final HostAndPort clientAddress = compactorAddress.getAddress();
try {
announceExistence(clientAddress);
} catch (KeeperException | InterruptedException e) {
throw new RuntimeException("Error registering compactor in ZooKeeper", e);
}
try {
MetricsUtil.initializeMetrics(getContext().getConfiguration(), this.applicationName, clientAddress);
} catch (Exception e1) {
LOG.error("Error initializing metrics, metrics will not be emitted.", e1);
}
MetricsUtil.initializeProducers(this);
LOG.info("Compactor started, waiting for work");
try {
final AtomicReference<Throwable> err = new AtomicReference<>();
while (!shutdown) {
currentCompactionId.set(null);
err.set(null);
JOB_HOLDER.reset();
TExternalCompactionJob job;
try {
job = getNextJob(getNextId());
if (!job.isSetExternalCompactionId()) {
LOG.trace("No external compactions in queue {}", this.queueName);
UtilWaitThread.sleep(getWaitTimeBetweenCompactionChecks());
continue;
}
if (!job.getExternalCompactionId().equals(currentCompactionId.get().toString())) {
throw new IllegalStateException("Returned eci " + job.getExternalCompactionId() + " does not match supplied eci " + currentCompactionId.get());
}
} catch (RetriesExceededException e2) {
LOG.warn("Retries exceeded getting next job. Retrying...");
continue;
}
LOG.debug("Received next compaction job: {}", job);
final LongAdder totalInputEntries = new LongAdder();
final LongAdder totalInputBytes = new LongAdder();
final CountDownLatch started = new CountDownLatch(1);
final CountDownLatch stopped = new CountDownLatch(1);
final Thread compactionThread = Threads.createThread("Compaction job for tablet " + job.getExtent().toString(), createCompactionJob(job, totalInputEntries, totalInputBytes, started, stopped, err));
JOB_HOLDER.set(job, compactionThread);
try {
// start the compactionThread
compactionThread.start();
// wait until the compactor is started
started.await();
final long inputEntries = totalInputEntries.sum();
final long waitTime = calculateProgressCheckTime(totalInputBytes.sum());
LOG.debug("Progress checks will occur every {} seconds", waitTime);
String percentComplete = "unknown";
while (!stopped.await(waitTime, TimeUnit.SECONDS)) {
List<CompactionInfo> running = org.apache.accumulo.server.compaction.FileCompactor.getRunningCompactions();
if (!running.isEmpty()) {
// Compaction has started. There should only be one in the list
CompactionInfo info = running.get(0);
if (info != null) {
if (inputEntries > 0) {
percentComplete = Float.toString((info.getEntriesRead() / (float) inputEntries) * 100);
}
String message = String.format("Compaction in progress, read %d of %d input entries ( %s %s ), written %d entries", info.getEntriesRead(), inputEntries, percentComplete, "%", info.getEntriesWritten());
watcher.run();
try {
LOG.debug("Updating coordinator with compaction progress: {}.", message);
TCompactionStatusUpdate update = new TCompactionStatusUpdate(TCompactionState.IN_PROGRESS, message, inputEntries, info.getEntriesRead(), info.getEntriesWritten());
updateCompactionState(job, update);
} catch (RetriesExceededException e) {
LOG.warn("Error updating coordinator with compaction progress, error: {}", e.getMessage());
}
}
} else {
LOG.error("Waiting on compaction thread to finish, but no RUNNING compaction");
}
}
compactionThread.join();
LOG.trace("Compaction thread finished.");
// Run the watcher again to clear out the finished compaction and set the
// stuck count to zero.
watcher.run();
if (err.get() != null) {
// maybe the error occured because the table was deleted or something like that, so
// force a cancel check to possibly reduce noise in the logs
checkIfCanceled();
}
if (compactionThread.isInterrupted() || JOB_HOLDER.isCancelled() || (err.get() != null && err.get().getClass().equals(InterruptedException.class))) {
LOG.warn("Compaction thread was interrupted, sending CANCELLED state");
try {
TCompactionStatusUpdate update = new TCompactionStatusUpdate(TCompactionState.CANCELLED, "Compaction cancelled", -1, -1, -1);
updateCompactionState(job, update);
updateCompactionFailed(job);
} catch (RetriesExceededException e) {
LOG.error("Error updating coordinator with compaction cancellation.", e);
} finally {
currentCompactionId.set(null);
}
} else if (err.get() != null) {
try {
LOG.info("Updating coordinator with compaction failure.");
TCompactionStatusUpdate update = new TCompactionStatusUpdate(TCompactionState.FAILED, "Compaction failed due to: " + err.get().getMessage(), -1, -1, -1);
updateCompactionState(job, update);
updateCompactionFailed(job);
} catch (RetriesExceededException e) {
LOG.error("Error updating coordinator with compaction failure.", e);
} finally {
currentCompactionId.set(null);
}
} else {
try {
LOG.trace("Updating coordinator with compaction completion.");
updateCompactionCompleted(job, JOB_HOLDER.getStats());
} catch (RetriesExceededException e) {
LOG.error("Error updating coordinator with compaction completion, cancelling compaction.", e);
try {
cancel(job.getExternalCompactionId());
} catch (TException e1) {
LOG.error("Error cancelling compaction.", e1);
}
} finally {
currentCompactionId.set(null);
}
}
} catch (RuntimeException e1) {
LOG.error("Compactor thread was interrupted waiting for compaction to start, cancelling job", e1);
try {
cancel(job.getExternalCompactionId());
} catch (TException e2) {
LOG.error("Error cancelling compaction.", e2);
}
} finally {
currentCompactionId.set(null);
// avoid multiple threads updating shared state.
while (compactionThread.isAlive()) {
compactionThread.interrupt();
compactionThread.join(1000);
}
}
}
} catch (Exception e) {
LOG.error("Unhandled error occurred in Compactor", e);
} finally {
// Shutdown local thrift server
LOG.info("Stopping Thrift Servers");
if (compactorAddress.server != null) {
compactorAddress.server.stop();
}
try {
LOG.debug("Closing filesystems");
VolumeManager mgr = getContext().getVolumeManager();
if (null != mgr) {
mgr.close();
}
} catch (IOException e) {
LOG.warn("Failed to close filesystem : {}", e.getMessage(), e);
}
gcLogger.logGCInfo(getConfiguration());
LOG.info("stop requested. exiting ... ");
try {
if (null != compactorLock) {
compactorLock.unlock();
}
} catch (Exception e) {
LOG.warn("Failed to release compactor lock", e);
}
}
}
Aggregations