use of org.apache.accumulo.gc.metrics.GcMetrics in project accumulo by apache.
the class SimpleGarbageCollector method run.
@Override
@SuppressFBWarnings(value = "DM_EXIT", justification = "main class can call System.exit")
public void run() {
final VolumeManager fs = getContext().getVolumeManager();
// Sleep for an initial period, giving the manager time to start up and
// old data files to be unused
log.info("Trying to acquire ZooKeeper lock for garbage collector");
HostAndPort address = startStatsService();
try {
getZooLock(address);
} catch (Exception ex) {
log.error("{}", ex.getMessage(), ex);
System.exit(1);
}
try {
MetricsUtil.initializeMetrics(getContext().getConfiguration(), this.applicationName, address);
MetricsUtil.initializeProducers(new GcMetrics(this));
} catch (Exception e1) {
log.error("Error initializing metrics, metrics will not be emitted.", e1);
}
try {
long delay = getStartDelay();
log.debug("Sleeping for {} milliseconds before beginning garbage collection cycles", delay);
Thread.sleep(delay);
} catch (InterruptedException e) {
log.warn("{}", e.getMessage(), e);
return;
}
// This is created outside of the run loop and passed to the walogCollector so that
// only a single timed task is created (internal to LiveTServerSet) using SimpleTimer.
final LiveTServerSet liveTServerSet = new LiveTServerSet(getContext(), (current, deleted, added) -> {
log.debug("Number of current servers {}, tservers added {}, removed {}", current == null ? -1 : current.size(), added, deleted);
if (log.isTraceEnabled()) {
log.trace("Current servers: {}\nAdded: {}\n Removed: {}", current, added, deleted);
}
});
while (true) {
Span outerSpan = TraceUtil.startSpan(this.getClass(), "gc");
try (Scope outerScope = outerSpan.makeCurrent()) {
Span innerSpan = TraceUtil.startSpan(this.getClass(), "loop");
try (Scope innerScope = innerSpan.makeCurrent()) {
final long tStart = System.nanoTime();
try {
// make room
System.gc();
status.current.started = System.currentTimeMillis();
new GarbageCollectionAlgorithm().collect(new GCEnv(DataLevel.ROOT));
new GarbageCollectionAlgorithm().collect(new GCEnv(DataLevel.METADATA));
new GarbageCollectionAlgorithm().collect(new GCEnv(DataLevel.USER));
log.info("Number of data file candidates for deletion: {}", status.current.candidates);
log.info("Number of data file candidates still in use: {}", status.current.inUse);
log.info("Number of successfully deleted data files: {}", status.current.deleted);
log.info("Number of data files delete failures: {}", status.current.errors);
status.current.finished = System.currentTimeMillis();
status.last = status.current;
gcCycleMetrics.setLastCollect(status.current);
status.current = new GcCycleStats();
} catch (Exception e) {
TraceUtil.setException(innerSpan, e, false);
log.error("{}", e.getMessage(), e);
}
final long tStop = System.nanoTime();
log.info(String.format("Collect cycle took %.2f seconds", (TimeUnit.NANOSECONDS.toMillis(tStop - tStart) / 1000.0)));
/*
* We want to prune references to fully-replicated WALs from the replication table which
* are no longer referenced in the metadata table before running
* GarbageCollectWriteAheadLogs to ensure we delete as many files as possible.
*/
Span replSpan = TraceUtil.startSpan(this.getClass(), "replicationClose");
try (Scope replScope = replSpan.makeCurrent()) {
@SuppressWarnings("deprecation") Runnable closeWals = new org.apache.accumulo.gc.replication.CloseWriteAheadLogReferences(getContext());
closeWals.run();
} catch (Exception e) {
TraceUtil.setException(replSpan, e, false);
log.error("Error trying to close write-ahead logs for replication table", e);
} finally {
replSpan.end();
}
// Clean up any unused write-ahead logs
Span walSpan = TraceUtil.startSpan(this.getClass(), "walogs");
try (Scope walScope = walSpan.makeCurrent()) {
GarbageCollectWriteAheadLogs walogCollector = new GarbageCollectWriteAheadLogs(getContext(), fs, liveTServerSet, isUsingTrash());
log.info("Beginning garbage collection of write-ahead logs");
walogCollector.collect(status);
gcCycleMetrics.setLastWalCollect(status.lastLog);
} catch (Exception e) {
TraceUtil.setException(walSpan, e, false);
log.error("{}", e.getMessage(), e);
} finally {
walSpan.end();
}
} catch (Exception e) {
TraceUtil.setException(innerSpan, e, true);
throw e;
} finally {
innerSpan.end();
}
// we just made a lot of metadata changes: flush them out
try {
AccumuloClient accumuloClient = getContext();
final long actionStart = System.nanoTime();
String action = getConfiguration().get(Property.GC_USE_FULL_COMPACTION);
log.debug("gc post action {} started", action);
switch(action) {
case "compact":
accumuloClient.tableOperations().compact(MetadataTable.NAME, null, null, true, true);
accumuloClient.tableOperations().compact(RootTable.NAME, null, null, true, true);
break;
case "flush":
accumuloClient.tableOperations().flush(MetadataTable.NAME, null, null, true);
accumuloClient.tableOperations().flush(RootTable.NAME, null, null, true);
break;
default:
log.trace("'none - no action' or invalid value provided: {}", action);
}
final long actionComplete = System.nanoTime();
gcCycleMetrics.setPostOpDurationNanos(actionComplete - actionStart);
log.info("gc post action {} completed in {} seconds", action, String.format("%.2f", (TimeUnit.NANOSECONDS.toMillis(actionComplete - actionStart) / 1000.0)));
} catch (Exception e) {
TraceUtil.setException(outerSpan, e, false);
log.warn("{}", e.getMessage(), e);
}
} catch (Exception e) {
TraceUtil.setException(outerSpan, e, true);
throw e;
} finally {
outerSpan.end();
}
try {
gcCycleMetrics.incrementRunCycleCount();
long gcDelay = getConfiguration().getTimeInMillis(Property.GC_CYCLE_DELAY);
log.debug("Sleeping for {} milliseconds", gcDelay);
Thread.sleep(gcDelay);
} catch (InterruptedException e) {
log.warn("{}", e.getMessage(), e);
return;
}
}
}
Aggregations