use of org.elasticsearch.common.unit.ByteSizeValue in project elasticsearch by elastic.
the class ElasticsearchConcurrentMergeScheduler method doMerge.
@Override
protected void doMerge(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException {
int totalNumDocs = merge.totalNumDocs();
long totalSizeInBytes = merge.totalBytesSize();
long timeNS = System.nanoTime();
currentMerges.inc();
currentMergesNumDocs.inc(totalNumDocs);
currentMergesSizeInBytes.inc(totalSizeInBytes);
OnGoingMerge onGoingMerge = new OnGoingMerge(merge);
onGoingMerges.add(onGoingMerge);
if (logger.isTraceEnabled()) {
logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", OneMergeHelper.getSegmentName(merge), merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
}
try {
beforeMerge(onGoingMerge);
super.doMerge(writer, merge);
} finally {
long tookMS = TimeValue.nsecToMSec(System.nanoTime() - timeNS);
onGoingMerges.remove(onGoingMerge);
afterMerge(onGoingMerge);
currentMerges.dec();
currentMergesNumDocs.dec(totalNumDocs);
currentMergesSizeInBytes.dec(totalSizeInBytes);
totalMergesNumDocs.inc(totalNumDocs);
totalMergesSizeInBytes.inc(totalSizeInBytes);
totalMerges.inc(tookMS);
long stoppedMS = TimeValue.nsecToMSec(merge.rateLimiter.getTotalStoppedNS());
long throttledMS = TimeValue.nsecToMSec(merge.rateLimiter.getTotalPausedNS());
totalMergeStoppedTime.inc(stoppedMS);
totalMergeThrottledTime.inc(throttledMS);
String message = String.format(Locale.ROOT, "merge segment [%s] done: took [%s], [%,.1f MB], [%,d docs], [%s stopped], [%s throttled], [%,.1f MB written], [%,.1f MB/sec throttle]", OneMergeHelper.getSegmentName(merge), TimeValue.timeValueMillis(tookMS), totalSizeInBytes / 1024f / 1024f, totalNumDocs, TimeValue.timeValueMillis(stoppedMS), TimeValue.timeValueMillis(throttledMS), merge.rateLimiter.getTotalBytesWritten() / 1024f / 1024f, merge.rateLimiter.getMBPerSec());
if (tookMS > 20000) {
// if more than 20 seconds, DEBUG log it
logger.debug("{}", message);
} else if (logger.isTraceEnabled()) {
logger.trace("{}", message);
}
}
}
use of org.elasticsearch.common.unit.ByteSizeValue in project elasticsearch by elastic.
the class InternalEngine method writeIndexingBuffer.
@Override
public void writeIndexingBuffer() throws EngineException {
// since it flushes the index as well (though, in terms of concurrency, we are allowed to do it)
try (ReleasableLock lock = readLock.acquire()) {
ensureOpen();
// TODO: it's not great that we secretly tie searcher visibility to "freeing up heap" here... really we should keep two
// searcher managers, one for searching which is only refreshed by the schedule the user requested (refresh_interval, or invoking
// refresh API), and another for version map interactions. See #15768.
final long versionMapBytes = versionMap.ramBytesUsedForRefresh();
final long indexingBufferBytes = indexWriter.ramBytesUsed();
final boolean useRefresh = versionMapRefreshPending.get() || (indexingBufferBytes / 4 < versionMapBytes);
if (useRefresh) {
// The version map is using > 25% of the indexing buffer, so we do a refresh so the version map also clears
logger.debug("use refresh to write indexing buffer (heap size=[{}]), to also clear version map (heap size=[{}])", new ByteSizeValue(indexingBufferBytes), new ByteSizeValue(versionMapBytes));
refresh("write indexing buffer");
} else {
// Most of our heap is used by the indexing buffer, so we do a cheaper (just writes segments, doesn't open a new searcher) IW.flush:
logger.debug("use IndexWriter.flush to write indexing buffer (heap size=[{}]) since version map is small (heap size=[{}])", new ByteSizeValue(indexingBufferBytes), new ByteSizeValue(versionMapBytes));
indexWriter.flush();
}
} catch (AlreadyClosedException e) {
failOnTragicEvent(e);
throw e;
} catch (Exception e) {
try {
failEngine("writeIndexingBuffer failed", e);
} catch (Exception inner) {
e.addSuppressed(inner);
}
throw new RefreshFailedEngineException(shardId, e);
}
}
use of org.elasticsearch.common.unit.ByteSizeValue in project elasticsearch by elastic.
the class PeerRecoveryTargetService method doRecovery.
private void doRecovery(final long recoveryId) {
final StartRecoveryRequest request;
final CancellableThreads cancellableThreads;
final RecoveryState.Timer timer;
try (RecoveryRef recoveryRef = onGoingRecoveries.getRecovery(recoveryId)) {
if (recoveryRef == null) {
logger.trace("not running recovery with id [{}] - can not find it (probably finished)", recoveryId);
return;
}
final RecoveryTarget recoveryTarget = recoveryRef.target();
cancellableThreads = recoveryTarget.cancellableThreads();
timer = recoveryTarget.state().getTimer();
try {
assert recoveryTarget.sourceNode() != null : "can not do a recovery without a source node";
request = getStartRecoveryRequest(recoveryTarget);
logger.trace("{} preparing shard for peer recovery", recoveryTarget.shardId());
recoveryTarget.indexShard().prepareForIndexRecovery();
} catch (final Exception e) {
// this will be logged as warning later on...
logger.trace("unexpected error while preparing shard for peer recovery, failing recovery", e);
onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(recoveryTarget.state(), "failed to prepare shard for recovery", e), true);
return;
}
}
try {
logger.trace("{} starting recovery from {}", request.shardId(), request.sourceNode());
final AtomicReference<RecoveryResponse> responseHolder = new AtomicReference<>();
cancellableThreads.execute(() -> responseHolder.set(transportService.submitRequest(request.sourceNode(), PeerRecoverySourceService.Actions.START_RECOVERY, request, new FutureTransportResponseHandler<RecoveryResponse>() {
@Override
public RecoveryResponse newInstance() {
return new RecoveryResponse();
}
}).txGet()));
final RecoveryResponse recoveryResponse = responseHolder.get();
final TimeValue recoveryTime = new TimeValue(timer.time());
// do this through ongoing recoveries to remove it from the collection
onGoingRecoveries.markRecoveryAsDone(recoveryId);
if (logger.isTraceEnabled()) {
StringBuilder sb = new StringBuilder();
sb.append('[').append(request.shardId().getIndex().getName()).append(']').append('[').append(request.shardId().id()).append("] ");
sb.append("recovery completed from ").append(request.sourceNode()).append(", took[").append(recoveryTime).append("]\n");
sb.append(" phase1: recovered_files [").append(recoveryResponse.phase1FileNames.size()).append("]").append(" with " + "total_size of [").append(new ByteSizeValue(recoveryResponse.phase1TotalSize)).append("]").append(", took [").append(timeValueMillis(recoveryResponse.phase1Time)).append("], throttling_wait [").append(timeValueMillis(recoveryResponse.phase1ThrottlingWaitTime)).append(']').append("\n");
sb.append(" : reusing_files [").append(recoveryResponse.phase1ExistingFileNames.size()).append("] with " + "total_size of [").append(new ByteSizeValue(recoveryResponse.phase1ExistingTotalSize)).append("]\n");
sb.append(" phase2: start took [").append(timeValueMillis(recoveryResponse.startTime)).append("]\n");
sb.append(" : recovered [").append(recoveryResponse.phase2Operations).append("]").append(" transaction log " + "operations").append(", took [").append(timeValueMillis(recoveryResponse.phase2Time)).append("]").append("\n");
logger.trace("{}", sb);
} else {
logger.debug("{} recovery done from [{}], took [{}]", request.shardId(), request.sourceNode(), recoveryTime);
}
} catch (CancellableThreads.ExecutionCancelledException e) {
logger.trace("recovery cancelled", e);
} catch (Exception e) {
if (logger.isTraceEnabled()) {
logger.trace((Supplier<?>) () -> new ParameterizedMessage("[{}][{}] Got exception on recovery", request.shardId().getIndex().getName(), request.shardId().id()), e);
}
Throwable cause = ExceptionsHelper.unwrapCause(e);
if (cause instanceof CancellableThreads.ExecutionCancelledException) {
// this can also come from the source wrapped in a RemoteTransportException
onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(request, "source has canceled the recovery", cause), false);
return;
}
if (cause instanceof RecoveryEngineException) {
// unwrap an exception that was thrown as part of the recovery
cause = cause.getCause();
}
// do it twice, in case we have double transport exception
cause = ExceptionsHelper.unwrapCause(cause);
if (cause instanceof RecoveryEngineException) {
// unwrap an exception that was thrown as part of the recovery
cause = cause.getCause();
}
if (cause instanceof IllegalIndexShardStateException || cause instanceof IndexNotFoundException || cause instanceof ShardNotFoundException) {
// if the target is not ready yet, retry
retryRecovery(recoveryId, "remote shard not ready", recoverySettings.retryDelayStateSync(), recoverySettings.activityTimeout());
return;
}
if (cause instanceof DelayRecoveryException) {
retryRecovery(recoveryId, cause, recoverySettings.retryDelayStateSync(), recoverySettings.activityTimeout());
return;
}
if (cause instanceof ConnectTransportException) {
logger.debug("delaying recovery of {} for [{}] due to networking error [{}]", request.shardId(), recoverySettings.retryDelayNetwork(), cause.getMessage());
retryRecovery(recoveryId, cause.getMessage(), recoverySettings.retryDelayNetwork(), recoverySettings.activityTimeout());
return;
}
if (cause instanceof AlreadyClosedException) {
onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(request, "source shard is closed", cause), false);
return;
}
onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(request, e), true);
}
}
use of org.elasticsearch.common.unit.ByteSizeValue in project elasticsearch by elastic.
the class RecoverySourceHandler method phase1.
/**
* Perform phase1 of the recovery operations. Once this {@link IndexCommit}
* snapshot has been performed no commit operations (files being fsync'd)
* are effectively allowed on this index until all recovery phases are done
* <p>
* Phase1 examines the segment files on the target node and copies over the
* segments that are missing. Only segments that have the same size and
* checksum can be reused
*/
public void phase1(final IndexCommit snapshot, final Translog.View translogView) {
cancellableThreads.checkForCancel();
// Total size of segment files that are recovered
long totalSize = 0;
// Total size of segment files that were able to be re-used
long existingTotalSize = 0;
final Store store = shard.store();
store.incRef();
try {
StopWatch stopWatch = new StopWatch().start();
final Store.MetadataSnapshot recoverySourceMetadata;
try {
recoverySourceMetadata = store.getMetadata(snapshot);
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
shard.failShard("recovery", ex);
throw ex;
}
for (String name : snapshot.getFileNames()) {
final StoreFileMetaData md = recoverySourceMetadata.get(name);
if (md == null) {
logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
}
}
// Generate a "diff" of all the identical, different, and missing
// segment files on the target node, using the existing files on
// the source node
String recoverySourceSyncId = recoverySourceMetadata.getSyncId();
String recoveryTargetSyncId = request.metadataSnapshot().getSyncId();
final boolean recoverWithSyncId = recoverySourceSyncId != null && recoverySourceSyncId.equals(recoveryTargetSyncId);
if (recoverWithSyncId) {
final long numDocsTarget = request.metadataSnapshot().getNumDocs();
final long numDocsSource = recoverySourceMetadata.getNumDocs();
if (numDocsTarget != numDocsSource) {
throw new IllegalStateException("try to recover " + request.shardId() + " from primary shard with sync id but number " + "of docs differ: " + numDocsSource + " (" + request.sourceNode().getName() + ", primary) vs " + numDocsTarget + "(" + request.targetNode().getName() + ")");
}
// we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
// so we don't return here
logger.trace("skipping [phase1]- identical sync id [{}] found on both source and target", recoverySourceSyncId);
} else {
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
for (StoreFileMetaData md : diff.identical) {
response.phase1ExistingFileNames.add(md.name());
response.phase1ExistingFileSizes.add(md.length());
existingTotalSize += md.length();
if (logger.isTraceEnabled()) {
logger.trace("recovery [phase1]: not recovering [{}], exist in local store and has checksum [{}]," + " size [{}]", md.name(), md.checksum(), md.length());
}
totalSize += md.length();
}
List<StoreFileMetaData> phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size());
phase1Files.addAll(diff.different);
phase1Files.addAll(diff.missing);
for (StoreFileMetaData md : phase1Files) {
if (request.metadataSnapshot().asMap().containsKey(md.name())) {
logger.trace("recovery [phase1]: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
} else {
logger.trace("recovery [phase1]: recovering [{}], does not exist in remote", md.name());
}
response.phase1FileNames.add(md.name());
response.phase1FileSizes.add(md.length());
totalSize += md.length();
}
response.phase1TotalSize = totalSize;
response.phase1ExistingTotalSize = existingTotalSize;
logger.trace("recovery [phase1]: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", response.phase1FileNames.size(), new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSize));
cancellableThreads.execute(() -> recoveryTarget.receiveFileInfo(response.phase1FileNames, response.phase1FileSizes, response.phase1ExistingFileNames, response.phase1ExistingFileSizes, translogView.totalOperations()));
// How many bytes we've copied since we last called RateLimiter.pause
final Function<StoreFileMetaData, OutputStream> outputStreamFactories = md -> new BufferedOutputStream(new RecoveryOutputStream(md, translogView), chunkSizeInBytes);
sendFiles(store, phase1Files.toArray(new StoreFileMetaData[phase1Files.size()]), outputStreamFactories);
// are deleted
try {
cancellableThreads.executeIO(() -> recoveryTarget.cleanFiles(translogView.totalOperations(), recoverySourceMetadata));
} catch (RemoteTransportException | IOException targetException) {
final IOException corruptIndexException;
// - maybe due to old segments without checksums or length only checks
if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(targetException)) != null) {
try {
final Store.MetadataSnapshot recoverySourceMetadata1 = store.getMetadata(snapshot);
StoreFileMetaData[] metadata = StreamSupport.stream(recoverySourceMetadata1.spliterator(), false).toArray(size -> new StoreFileMetaData[size]);
ArrayUtil.timSort(metadata, (o1, o2) -> {
// check small files first
return Long.compare(o1.length(), o2.length());
});
for (StoreFileMetaData md : metadata) {
cancellableThreads.checkForCancel();
logger.debug("checking integrity for file {} after remove corruption exception", md);
if (store.checkIntegrityNoException(md) == false) {
// we are corrupted on the primary -- fail!
shard.failShard("recovery", corruptIndexException);
logger.warn("Corrupted file detected {} checksum mismatch", md);
throw corruptIndexException;
}
}
} catch (IOException ex) {
targetException.addSuppressed(ex);
throw targetException;
}
// corruption has happened on the way to replica
RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but " + "checksums are ok", null);
exception.addSuppressed(targetException);
logger.warn((org.apache.logging.log4j.util.Supplier<?>) () -> new ParameterizedMessage("{} Remote file corruption during finalization of recovery on node {}. local checksum OK", shard.shardId(), request.targetNode()), corruptIndexException);
throw exception;
} else {
throw targetException;
}
}
}
logger.trace("recovery [phase1]: took [{}]", stopWatch.totalTime());
response.phase1Time = stopWatch.totalTime().millis();
} catch (Exception e) {
throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), e);
} finally {
store.decRef();
}
}
use of org.elasticsearch.common.unit.ByteSizeValue in project elasticsearch by elastic.
the class JvmGcMonitorService method logSlowGc.
static void logSlowGc(final Logger logger, final JvmMonitor.Threshold threshold, final long seq, final JvmMonitor.SlowGcEvent slowGcEvent, BiFunction<JvmStats, JvmStats, String> pools) {
final String name = slowGcEvent.currentGc.getName();
final long elapsed = slowGcEvent.elapsed;
final long totalGcCollectionCount = slowGcEvent.currentGc.getCollectionCount();
final long currentGcCollectionCount = slowGcEvent.collectionCount;
final TimeValue totalGcCollectionTime = slowGcEvent.currentGc.getCollectionTime();
final TimeValue currentGcCollectionTime = slowGcEvent.collectionTime;
final JvmStats lastJvmStats = slowGcEvent.lastJvmStats;
final JvmStats currentJvmStats = slowGcEvent.currentJvmStats;
final ByteSizeValue maxHeapUsed = slowGcEvent.maxHeapUsed;
switch(threshold) {
case WARN:
if (logger.isWarnEnabled()) {
logger.warn(SLOW_GC_LOG_MESSAGE, name, seq, totalGcCollectionCount, currentGcCollectionTime, currentGcCollectionCount, TimeValue.timeValueMillis(elapsed), currentGcCollectionTime, totalGcCollectionTime, lastJvmStats.getMem().getHeapUsed(), currentJvmStats.getMem().getHeapUsed(), maxHeapUsed, pools.apply(lastJvmStats, currentJvmStats));
}
break;
case INFO:
if (logger.isInfoEnabled()) {
logger.info(SLOW_GC_LOG_MESSAGE, name, seq, totalGcCollectionCount, currentGcCollectionTime, currentGcCollectionCount, TimeValue.timeValueMillis(elapsed), currentGcCollectionTime, totalGcCollectionTime, lastJvmStats.getMem().getHeapUsed(), currentJvmStats.getMem().getHeapUsed(), maxHeapUsed, pools.apply(lastJvmStats, currentJvmStats));
}
break;
case DEBUG:
if (logger.isDebugEnabled()) {
logger.debug(SLOW_GC_LOG_MESSAGE, name, seq, totalGcCollectionCount, currentGcCollectionTime, currentGcCollectionCount, TimeValue.timeValueMillis(elapsed), currentGcCollectionTime, totalGcCollectionTime, lastJvmStats.getMem().getHeapUsed(), currentJvmStats.getMem().getHeapUsed(), maxHeapUsed, pools.apply(lastJvmStats, currentJvmStats));
}
break;
}
}
Aggregations