use of com.facebook.presto.execution.QueryExecution in project presto by prestodb.
the class ClusterMemoryManager method process.
public synchronized void process(Iterable<QueryExecution> queries) {
if (!enabled) {
return;
}
boolean outOfMemory = isClusterOutOfMemory();
if (!outOfMemory) {
lastTimeNotOutOfMemory = System.nanoTime();
}
boolean queryKilled = false;
long totalBytes = 0;
for (QueryExecution query : queries) {
long bytes = query.getTotalMemoryReservation();
DataSize sessionMaxQueryMemory = getQueryMaxMemory(query.getSession());
long queryMemoryLimit = Math.min(maxQueryMemory.toBytes(), sessionMaxQueryMemory.toBytes());
totalBytes += bytes;
if (resourceOvercommit(query.getSession()) && outOfMemory) {
// If a query has requested resource overcommit, only kill it if the cluster has run out of memory
DataSize memory = succinctBytes(bytes);
query.fail(new PrestoException(CLUSTER_OUT_OF_MEMORY, format("The cluster is out of memory and %s=true, so this query was killed. It was using %s of memory", RESOURCE_OVERCOMMIT, memory)));
queryKilled = true;
}
if (!resourceOvercommit(query.getSession()) && bytes > queryMemoryLimit) {
DataSize maxMemory = succinctBytes(queryMemoryLimit);
query.fail(exceededGlobalLimit(maxMemory));
queryKilled = true;
}
}
clusterMemoryUsageBytes.set(totalBytes);
if (killOnOutOfMemory) {
boolean shouldKillQuery = nanosSince(lastTimeNotOutOfMemory).compareTo(killOnOutOfMemoryDelay) > 0 && outOfMemory;
boolean lastKilledQueryIsGone = (lastKilledQuery == null);
if (!lastKilledQueryIsGone) {
ClusterMemoryPool generalPool = pools.get(GENERAL_POOL);
if (generalPool != null) {
lastKilledQueryIsGone = generalPool.getQueryMemoryReservations().containsKey(lastKilledQuery);
}
}
if (shouldKillQuery && lastKilledQueryIsGone && !queryKilled) {
// Kill the biggest query in the general pool
QueryExecution biggestQuery = null;
long maxMemory = -1;
for (QueryExecution query : queries) {
long bytesUsed = query.getTotalMemoryReservation();
if (bytesUsed > maxMemory && query.getMemoryPool().getId().equals(GENERAL_POOL)) {
biggestQuery = query;
maxMemory = bytesUsed;
}
}
if (biggestQuery != null) {
biggestQuery.fail(new PrestoException(CLUSTER_OUT_OF_MEMORY, "The cluster is out of memory, and your query was killed. Please try again in a few minutes."));
queriesKilledDueToOutOfMemory.incrementAndGet();
lastKilledQuery = biggestQuery.getQueryId();
}
}
}
Map<MemoryPoolId, Integer> countByPool = new HashMap<>();
for (QueryExecution query : queries) {
MemoryPoolId id = query.getMemoryPool().getId();
countByPool.put(id, countByPool.getOrDefault(id, 0) + 1);
}
updatePools(countByPool);
updateNodes(updateAssignments(queries));
// check if CPU usage is over limit
for (QueryExecution query : queries) {
Duration cpuTime = query.getTotalCpuTime();
Duration sessionLimit = getQueryMaxCpuTime(query.getSession());
Duration limit = maxQueryCpuTime.compareTo(sessionLimit) < 0 ? maxQueryCpuTime : sessionLimit;
if (cpuTime.compareTo(limit) > 0) {
query.fail(new ExceededCpuLimitException(limit));
}
}
}
use of com.facebook.presto.execution.QueryExecution in project presto by prestodb.
the class ClusterMemoryManager method process.
public synchronized void process(Iterable<QueryExecution> runningQueries, Supplier<List<BasicQueryInfo>> allQueryInfoSupplier) {
if (!enabled) {
return;
}
// TODO revocable memory reservations can also leak and may need to be detected in the future
// We are only concerned about the leaks in general pool.
memoryLeakDetector.checkForMemoryLeaks(allQueryInfoSupplier, pools.get(GENERAL_POOL).getQueryMemoryReservations());
boolean outOfMemory = isClusterOutOfMemory();
if (!outOfMemory) {
lastTimeNotOutOfMemory = System.nanoTime();
}
boolean queryKilled = false;
long totalUserMemoryBytes = 0L;
long totalMemoryBytes = 0L;
for (QueryExecution query : runningQueries) {
boolean resourceOvercommit = resourceOvercommit(query.getSession());
long userMemoryReservation = query.getUserMemoryReservation().toBytes();
long totalMemoryReservation = query.getTotalMemoryReservation().toBytes();
if (resourceOvercommit && outOfMemory) {
// If a query has requested resource overcommit, only kill it if the cluster has run out of memory
DataSize memory = succinctBytes(getQueryMemoryReservation(query));
query.fail(new PrestoException(CLUSTER_OUT_OF_MEMORY, format("The cluster is out of memory and %s=true, so this query was killed. It was using %s of memory", RESOURCE_OVERCOMMIT, memory)));
queryKilled = true;
}
if (!resourceOvercommit) {
long userMemoryLimit = min(maxQueryMemory.toBytes(), getQueryMaxMemory(query.getSession()).toBytes());
if (userMemoryReservation > userMemoryLimit) {
query.fail(exceededGlobalUserLimit(succinctBytes(userMemoryLimit)));
queryKilled = true;
}
QueryLimit<DataSize> queryTotalMemoryLimit = getMinimum(createDataSizeLimit(maxQueryTotalMemory, SYSTEM), query.getResourceGroupQueryLimits().flatMap(ResourceGroupQueryLimits::getTotalMemoryLimit).map(rgLimit -> createDataSizeLimit(rgLimit, RESOURCE_GROUP)).orElse(null), createDataSizeLimit(getQueryMaxTotalMemory(query.getSession()), QUERY));
if (totalMemoryReservation > queryTotalMemoryLimit.getLimit().toBytes()) {
query.fail(exceededGlobalTotalLimit(queryTotalMemoryLimit.getLimit(), queryTotalMemoryLimit.getLimitSource().name()));
queryKilled = true;
}
}
totalUserMemoryBytes += userMemoryReservation;
totalMemoryBytes += totalMemoryReservation;
}
clusterUserMemoryReservation.set(totalUserMemoryBytes);
clusterTotalMemoryReservation.set(totalMemoryBytes);
boolean killOnOomDelayPassed = nanosSince(lastTimeNotOutOfMemory).compareTo(killOnOutOfMemoryDelay) > 0;
boolean lastKilledQueryGone = isLastKilledQueryGone();
boolean shouldCallOomKiller = !(lowMemoryKiller instanceof NoneLowMemoryKiller) && outOfMemory && !queryKilled && killOnOomDelayPassed && lastKilledQueryGone;
if (shouldCallOomKiller) {
callOomKiller(runningQueries);
} else {
// if the cluster is out of memory and we didn't trigger the oom killer we log the state to make debugging easier
if (outOfMemory) {
log.debug("The cluster is out of memory and the OOM killer is not called (query killed: %s, kill on OOM delay passed: %s, last killed query gone: %s).", queryKilled, killOnOomDelayPassed, lastKilledQueryGone);
}
}
Map<MemoryPoolId, Integer> countByPool = new HashMap<>();
for (QueryExecution query : runningQueries) {
MemoryPoolId id = query.getMemoryPool().getId();
countByPool.put(id, countByPool.getOrDefault(id, 0) + 1);
}
updatePools(countByPool);
MemoryPoolAssignmentsRequest assignmentsRequest;
if (pools.containsKey(RESERVED_POOL)) {
assignmentsRequest = updateAssignments(runningQueries);
} else {
// If reserved pool is not enabled, we don't create a MemoryPoolAssignmentsRequest that puts all the queries
// in the general pool (as they already are). In this case we create an effectively NOOP MemoryPoolAssignmentsRequest.
// Once the reserved pool is removed we should get rid of the logic of putting queries into reserved pool including
// this piece of code.
assignmentsRequest = new MemoryPoolAssignmentsRequest(coordinatorId, Long.MIN_VALUE, ImmutableList.of());
}
updateNodes(assignmentsRequest);
}
use of com.facebook.presto.execution.QueryExecution in project presto by prestodb.
the class ClusterMemoryManager method findLargestMemoryQuery.
private QueryExecution findLargestMemoryQuery(ClusterMemoryPoolInfo generalPool, Iterable<QueryExecution> queries) {
QueryExecution biggestQuery = null;
long maxMemory = -1;
Optional<QueryId> largestMemoryQuery = generalPool.getLargestMemoryQuery();
// If present, this means the resource manager is determining the largest query, so do not make this determination locally
if (memoryManagerService.isPresent()) {
return largestMemoryQuery.flatMap(largestMemoryQueryId -> Streams.stream(queries).filter(query -> query.getQueryId().equals(largestMemoryQueryId)).findFirst()).orElse(null);
}
for (QueryExecution queryExecution : queries) {
if (resourceOvercommit(queryExecution.getSession())) {
// since their memory usage is unbounded.
continue;
}
long bytesUsed = getQueryMemoryReservation(queryExecution);
if (bytesUsed > maxMemory) {
biggestQuery = queryExecution;
maxMemory = bytesUsed;
}
}
return biggestQuery;
}
use of com.facebook.presto.execution.QueryExecution in project presto by prestodb.
the class ClusterMemoryManager method updateAssignments.
// TODO once the reserved pool is removed we can remove this method. We can also update
// RemoteNodeMemory as we don't need to POST anything.
private synchronized MemoryPoolAssignmentsRequest updateAssignments(Iterable<QueryExecution> queries) {
ClusterMemoryPoolInfo reservedPool = getClusterInfo(RESERVED_POOL);
ClusterMemoryPoolInfo generalPool = getClusterInfo(GENERAL_POOL);
verify(generalPool != null, "generalPool is null");
verify(reservedPool != null, "reservedPool is null");
long version = memoryPoolAssignmentsVersion.incrementAndGet();
// and is more of a safety check than a guarantee
if (allAssignmentsHavePropagated(queries)) {
if (reservedPool.getAssignedQueries() == 0 && generalPool.getBlockedNodes() > 0) {
QueryExecution biggestQuery = findLargestMemoryQuery(generalPool, queries);
if (biggestQuery != null) {
log.info("Moving query %s to the reserved pool", biggestQuery.getQueryId());
biggestQuery.setMemoryPool(new VersionedMemoryPoolId(RESERVED_POOL, version));
}
}
}
ImmutableList.Builder<MemoryPoolAssignment> assignments = ImmutableList.builder();
for (QueryExecution queryExecution : queries) {
assignments.add(new MemoryPoolAssignment(queryExecution.getQueryId(), queryExecution.getMemoryPool().getId()));
}
return new MemoryPoolAssignmentsRequest(coordinatorId, version, assignments.build());
}
use of com.facebook.presto.execution.QueryExecution in project presto by prestodb.
the class LocalDispatchQueryFactory method createDispatchQuery.
@Override
public DispatchQuery createDispatchQuery(Session session, String query, PreparedQuery preparedQuery, String slug, int retryCount, ResourceGroupId resourceGroup, Optional<QueryType> queryType, WarningCollector warningCollector, Consumer<DispatchQuery> queryQueuer) {
QueryStateMachine stateMachine = QueryStateMachine.begin(query, preparedQuery.getPrepareSql(), session, locationFactory.createQueryLocation(session.getQueryId()), resourceGroup, queryType, isTransactionControlStatement(preparedQuery.getStatement()), transactionManager, accessControl, executor, metadata, warningCollector);
stateMachine.addStateChangeListener(new QueryStateTracingListener(stateMachine.getSession().getTracer().orElse(NoopTracerProvider.NOOP_TRACER)));
queryMonitor.queryCreatedEvent(stateMachine.getBasicQueryInfo(Optional.empty()));
ListenableFuture<QueryExecution> queryExecutionFuture = executor.submit(() -> {
QueryExecutionFactory<?> queryExecutionFactory = executionFactories.get(preparedQuery.getStatement().getClass());
if (queryExecutionFactory == null) {
throw new PrestoException(NOT_SUPPORTED, "Unsupported statement type: " + preparedQuery.getStatement().getClass().getSimpleName());
}
return queryExecutionFactory.createQueryExecution(preparedQuery, stateMachine, slug, retryCount, warningCollector, queryType);
});
return new LocalDispatchQuery(stateMachine, queryMonitor, queryExecutionFuture, clusterSizeMonitor, executor, queryQueuer, queryManager::createQuery, retryCount > 0, queryPrerequisitesManager);
}
Aggregations