use of com.facebook.presto.spi.storage.TempStorage in project presto by prestodb.
the class PrestoSparkTaskExecutorFactory method doCreate.
public <T extends PrestoSparkTaskOutput> IPrestoSparkTaskExecutor<T> doCreate(int partitionId, int attemptNumber, SerializedPrestoSparkTaskDescriptor serializedTaskDescriptor, Iterator<SerializedPrestoSparkTaskSource> serializedTaskSources, PrestoSparkTaskInputs inputs, CollectionAccumulator<SerializedTaskInfo> taskInfoCollector, CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector, Class<T> outputType) {
PrestoSparkTaskDescriptor taskDescriptor = taskDescriptorJsonCodec.fromJson(serializedTaskDescriptor.getBytes());
ImmutableMap.Builder<String, TokenAuthenticator> extraAuthenticators = ImmutableMap.builder();
authenticatorProviders.forEach(provider -> extraAuthenticators.putAll(provider.getTokenAuthenticators()));
Session session = taskDescriptor.getSession().toSession(sessionPropertyManager, taskDescriptor.getExtraCredentials(), extraAuthenticators.build());
PlanFragment fragment = taskDescriptor.getFragment();
StageId stageId = new StageId(session.getQueryId(), fragment.getId().getId());
// Clear the cache if the cache does not have broadcast table for current stageId.
// We will only cache 1 HT at any time. If the stageId changes, we will drop the old cached HT
prestoSparkBroadcastTableCacheManager.removeCachedTablesForStagesOtherThan(stageId);
// TODO: include attemptId in taskId
TaskId taskId = new TaskId(new StageExecutionId(stageId, 0), partitionId);
List<TaskSource> taskSources = getTaskSources(serializedTaskSources);
log.info("Task [%s] received %d splits.", taskId, taskSources.stream().mapToInt(taskSource -> taskSource.getSplits().size()).sum());
OptionalLong totalSplitSize = computeAllSplitsSize(taskSources);
if (totalSplitSize.isPresent()) {
log.info("Total split size: %s bytes.", totalSplitSize.getAsLong());
}
// TODO: Remove this once we can display the plan on Spark UI.
log.info(PlanPrinter.textPlanFragment(fragment, functionAndTypeManager, session, true));
DataSize maxUserMemory = new DataSize(min(nodeMemoryConfig.getMaxQueryMemoryPerNode().toBytes(), getQueryMaxMemoryPerNode(session).toBytes()), BYTE);
DataSize maxTotalMemory = new DataSize(min(nodeMemoryConfig.getMaxQueryTotalMemoryPerNode().toBytes(), getQueryMaxTotalMemoryPerNode(session).toBytes()), BYTE);
DataSize maxBroadcastMemory = getSparkBroadcastJoinMaxMemoryOverride(session);
if (maxBroadcastMemory == null) {
maxBroadcastMemory = new DataSize(min(nodeMemoryConfig.getMaxQueryBroadcastMemory().toBytes(), getQueryMaxBroadcastMemory(session).toBytes()), BYTE);
}
MemoryPool memoryPool = new MemoryPool(new MemoryPoolId("spark-executor-memory-pool"), maxTotalMemory);
SpillSpaceTracker spillSpaceTracker = new SpillSpaceTracker(maxQuerySpillPerNode);
QueryContext queryContext = new QueryContext(session.getQueryId(), maxUserMemory, maxTotalMemory, maxBroadcastMemory, maxRevocableMemory, memoryPool, new TestingGcMonitor(), notificationExecutor, yieldExecutor, maxQuerySpillPerNode, spillSpaceTracker, memoryReservationSummaryJsonCodec);
queryContext.setVerboseExceededMemoryLimitErrorsEnabled(isVerboseExceededMemoryLimitErrorsEnabled(session));
queryContext.setHeapDumpOnExceededMemoryLimitEnabled(isHeapDumpOnExceededMemoryLimitEnabled(session));
String heapDumpFilePath = Paths.get(getHeapDumpFileDirectory(session), format("%s_%s.hprof", session.getQueryId().getId(), stageId.getId())).toString();
queryContext.setHeapDumpFilePath(heapDumpFilePath);
TaskStateMachine taskStateMachine = new TaskStateMachine(taskId, notificationExecutor);
TaskContext taskContext = queryContext.addTaskContext(taskStateMachine, session, // Plan has to be retained only if verbose memory exceeded errors are requested
isVerboseExceededMemoryLimitErrorsEnabled(session) ? Optional.of(fragment.getRoot()) : Optional.empty(), perOperatorCpuTimerEnabled, cpuTimerEnabled, perOperatorAllocationTrackingEnabled, allocationTrackingEnabled, false);
final double memoryRevokingThreshold = getMemoryRevokingThreshold(session);
final double memoryRevokingTarget = getMemoryRevokingTarget(session);
checkArgument(memoryRevokingTarget <= memoryRevokingThreshold, "memoryRevokingTarget should be less than or equal memoryRevokingThreshold, but got %s and %s respectively", memoryRevokingTarget, memoryRevokingThreshold);
if (isSpillEnabled(session)) {
memoryPool.addListener((pool, queryId, totalMemoryReservationBytes) -> {
if (totalMemoryReservationBytes > queryContext.getPeakNodeTotalMemory()) {
queryContext.setPeakNodeTotalMemory(totalMemoryReservationBytes);
}
if (totalMemoryReservationBytes > pool.getMaxBytes() * memoryRevokingThreshold && memoryRevokeRequestInProgress.compareAndSet(false, true)) {
memoryRevocationExecutor.execute(() -> {
try {
AtomicLong remainingBytesToRevoke = new AtomicLong(totalMemoryReservationBytes - (long) (memoryRevokingTarget * pool.getMaxBytes()));
remainingBytesToRevoke.addAndGet(-MemoryRevokingSchedulerUtils.getMemoryAlreadyBeingRevoked(ImmutableList.of(taskContext), remainingBytesToRevoke.get()));
taskContext.accept(new VoidTraversingQueryContextVisitor<AtomicLong>() {
@Override
public Void visitOperatorContext(OperatorContext operatorContext, AtomicLong remainingBytesToRevoke) {
if (remainingBytesToRevoke.get() > 0) {
long revokedBytes = operatorContext.requestMemoryRevoking();
if (revokedBytes > 0) {
memoryRevokePending.set(true);
remainingBytesToRevoke.addAndGet(-revokedBytes);
}
}
return null;
}
}, remainingBytesToRevoke);
memoryRevokeRequestInProgress.set(false);
} catch (Exception e) {
log.error(e, "Error requesting memory revoking");
}
});
}
// Get the latest memory reservation info since it might have changed due to revoke
long totalReservedMemory = pool.getQueryMemoryReservation(queryId) + pool.getQueryRevocableMemoryReservation(queryId);
// If total memory usage is over maxTotalMemory and memory revoke request is not pending, fail the query with EXCEEDED_MEMORY_LIMIT error
if (totalReservedMemory > maxTotalMemory.toBytes() && !memoryRevokeRequestInProgress.get() && !isMemoryRevokePending(taskContext)) {
throw exceededLocalTotalMemoryLimit(maxTotalMemory, queryContext.getAdditionalFailureInfo(totalReservedMemory, 0) + format("Total reserved memory: %s, Total revocable memory: %s", succinctBytes(pool.getQueryMemoryReservation(queryId)), succinctBytes(pool.getQueryRevocableMemoryReservation(queryId))), isHeapDumpOnExceededMemoryLimitEnabled(session), Optional.ofNullable(heapDumpFilePath));
}
});
}
ImmutableMap.Builder<PlanNodeId, List<PrestoSparkShuffleInput>> shuffleInputs = ImmutableMap.builder();
ImmutableMap.Builder<PlanNodeId, List<java.util.Iterator<PrestoSparkSerializedPage>>> pageInputs = ImmutableMap.builder();
ImmutableMap.Builder<PlanNodeId, List<?>> broadcastInputs = ImmutableMap.builder();
for (RemoteSourceNode remoteSource : fragment.getRemoteSourceNodes()) {
List<PrestoSparkShuffleInput> remoteSourceRowInputs = new ArrayList<>();
List<java.util.Iterator<PrestoSparkSerializedPage>> remoteSourcePageInputs = new ArrayList<>();
List<List<?>> broadcastInputsList = new ArrayList<>();
for (PlanFragmentId sourceFragmentId : remoteSource.getSourceFragmentIds()) {
Iterator<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> shuffleInput = inputs.getShuffleInputs().get(sourceFragmentId.toString());
Broadcast<?> broadcastInput = inputs.getBroadcastInputs().get(sourceFragmentId.toString());
List<PrestoSparkSerializedPage> inMemoryInput = inputs.getInMemoryInputs().get(sourceFragmentId.toString());
if (shuffleInput != null) {
checkArgument(broadcastInput == null, "single remote source is not expected to accept different kind of inputs");
checkArgument(inMemoryInput == null, "single remote source is not expected to accept different kind of inputs");
remoteSourceRowInputs.add(new PrestoSparkShuffleInput(sourceFragmentId.getId(), shuffleInput));
continue;
}
if (broadcastInput != null) {
checkArgument(inMemoryInput == null, "single remote source is not expected to accept different kind of inputs");
// TODO: Enable NullifyingIterator once migrated to one task per JVM model
// NullifyingIterator removes element from the list upon return
// This allows GC to gradually reclaim memory
// remoteSourcePageInputs.add(getNullifyingIterator(broadcastInput.value()));
broadcastInputsList.add((List<?>) broadcastInput.value());
continue;
}
if (inMemoryInput != null) {
// for inmemory inputs pages can be released incrementally to save memory
remoteSourcePageInputs.add(getNullifyingIterator(inMemoryInput));
continue;
}
throw new IllegalArgumentException("Input not found for sourceFragmentId: " + sourceFragmentId);
}
if (!remoteSourceRowInputs.isEmpty()) {
shuffleInputs.put(remoteSource.getId(), remoteSourceRowInputs);
}
if (!remoteSourcePageInputs.isEmpty()) {
pageInputs.put(remoteSource.getId(), remoteSourcePageInputs);
}
if (!broadcastInputsList.isEmpty()) {
broadcastInputs.put(remoteSource.getId(), broadcastInputsList);
}
}
OutputBufferMemoryManager memoryManager = new OutputBufferMemoryManager(sinkMaxBufferSize.toBytes(), () -> queryContext.getTaskContextByTaskId(taskId).localSystemMemoryContext(), notificationExecutor);
Optional<OutputPartitioning> preDeterminedPartition = Optional.empty();
if (fragment.getPartitioningScheme().getPartitioning().getHandle().equals(FIXED_ARBITRARY_DISTRIBUTION)) {
int partitionCount = getHashPartitionCount(session);
preDeterminedPartition = Optional.of(new OutputPartitioning(new PreDeterminedPartitionFunction(partitionId % partitionCount, partitionCount), ImmutableList.of(), ImmutableList.of(), false, OptionalInt.empty()));
}
TempDataOperationContext tempDataOperationContext = new TempDataOperationContext(session.getSource(), session.getQueryId().getId(), session.getClientInfo(), Optional.of(session.getClientTags()), session.getIdentity());
TempStorage tempStorage = tempStorageManager.getTempStorage(storageBasedBroadcastJoinStorage);
Output<T> output = configureOutput(outputType, blockEncodingManager, memoryManager, getShuffleOutputTargetAverageRowSize(session), preDeterminedPartition, tempStorage, tempDataOperationContext, getStorageBasedBroadcastJoinWriteBufferSize(session));
PrestoSparkOutputBuffer<?> outputBuffer = output.getOutputBuffer();
LocalExecutionPlan localExecutionPlan = localExecutionPlanner.plan(taskContext, fragment.getRoot(), fragment.getPartitioningScheme(), fragment.getStageExecutionDescriptor(), fragment.getTableScanSchedulingOrder(), output.getOutputFactory(), new PrestoSparkRemoteSourceFactory(blockEncodingManager, shuffleInputs.build(), pageInputs.build(), broadcastInputs.build(), partitionId, shuffleStatsCollector, tempStorage, tempDataOperationContext, prestoSparkBroadcastTableCacheManager, stageId), taskDescriptor.getTableWriteInfo(), true);
taskStateMachine.addStateChangeListener(state -> {
if (state.isDone()) {
outputBuffer.setNoMoreRows();
}
});
PrestoSparkTaskExecution taskExecution = new PrestoSparkTaskExecution(taskStateMachine, taskContext, localExecutionPlan, taskExecutor, splitMonitor, notificationExecutor, memoryUpdateExecutor);
taskExecution.start(taskSources);
return new PrestoSparkTaskExecutor<>(taskContext, taskStateMachine, output.getOutputSupplier(), taskInfoCodec, taskInfoCollector, shuffleStatsCollector, executionExceptionFactory, output.getOutputBufferType(), outputBuffer, tempStorage, tempDataOperationContext);
}
use of com.facebook.presto.spi.storage.TempStorage in project presto by prestodb.
the class TempStorageManager method getTempStorage.
public TempStorage getTempStorage(String name) {
TempStorage tempStorage = loadedTempStorages.get(name);
checkState(tempStorage != null, "tempStorage %s was not loaded", name);
return tempStorage;
}
use of com.facebook.presto.spi.storage.TempStorage in project presto by prestodb.
the class TempStorageManager method loadTempStorage.
protected void loadTempStorage(String name, Map<String, String> properties) {
requireNonNull(name, "name is null");
requireNonNull(properties, "properties is null");
log.info("-- Loading temp storage %s --", name);
String tempStorageFactoryName = null;
ImmutableMap.Builder<String, String> tempStorageProperties = ImmutableMap.builder();
for (Map.Entry<String, String> entry : properties.entrySet()) {
if (entry.getKey().equals(TEMP_STORAGE_FACTORY_NAME)) {
tempStorageFactoryName = entry.getValue();
} else {
tempStorageProperties.put(entry.getKey(), entry.getValue());
}
}
checkState(tempStorageFactoryName != null, "Configuration for tempStorage %s does not contain temp-storage-factory.name", name);
TempStorageFactory factory = tempStorageFactories.get(tempStorageFactoryName);
checkState(factory != null, "Temp Storage Factory %s is not registered", tempStorageFactoryName);
TempStorage tempStorage = factory.create(tempStorageProperties.build(), new TempStorageContext(nodeManager));
if (loadedTempStorages.putIfAbsent(name, tempStorage) != null) {
throw new IllegalArgumentException(format("Temp Storage '%s' is already loaded", name));
}
log.info("-- Loaded temp storage %s --", name);
}
use of com.facebook.presto.spi.storage.TempStorage in project presto by prestodb.
the class TempStorageStandaloneSpillerFactory method create.
public TempStorageStandaloneSpiller create(Session session) {
TempDataOperationContext tempDataOperationContext = new TempDataOperationContext(session.getSource(), session.getQueryId().getId(), session.getClientInfo(), Optional.of(session.getClientTags()), session.getIdentity());
TempStorage tempStorage = tempStorageManager.getTempStorage(tempStorageName);
return new TempStorageStandaloneSpiller(tempDataOperationContext, tempStorage, serdeFactory.createPagesSerde(), spillerStats, toIntExact(getTempStorageSpillerBufferSize(session).toBytes()));
}
use of com.facebook.presto.spi.storage.TempStorage in project presto by prestodb.
the class PrestoSparkQueryExecutionFactory method create.
@Override
public IPrestoSparkQueryExecution create(SparkContext sparkContext, PrestoSparkSession prestoSparkSession, Optional<String> sqlText, Optional<String> sqlLocation, Optional<String> sqlFileHexHash, Optional<String> sqlFileSizeInBytes, Optional<String> sparkQueueName, PrestoSparkTaskExecutorFactoryProvider executorFactoryProvider, Optional<String> queryStatusInfoOutputLocation, Optional<String> queryDataOutputLocation) {
PrestoSparkConfInitializer.checkInitialized(sparkContext);
String sql;
if (sqlText.isPresent()) {
checkArgument(!sqlLocation.isPresent(), "sqlText and sqlLocation should not be set at the same time");
sql = sqlText.get();
} else {
checkArgument(sqlLocation.isPresent(), "sqlText or sqlLocation must be present");
byte[] sqlFileBytes = metadataStorage.read(sqlLocation.get());
if (sqlFileSizeInBytes.isPresent()) {
if (Integer.valueOf(sqlFileSizeInBytes.get()) != sqlFileBytes.length) {
throw new PrestoException(MALFORMED_QUERY_FILE, format("sql file size %s is different from expected sqlFileSizeInBytes %s", sqlFileBytes.length, sqlFileSizeInBytes.get()));
}
}
if (sqlFileHexHash.isPresent()) {
try {
MessageDigest md = MessageDigest.getInstance("SHA-512");
String actualHexHashCode = BaseEncoding.base16().lowerCase().encode(md.digest(sqlFileBytes));
if (!sqlFileHexHash.get().equals(actualHexHashCode)) {
throw new PrestoException(MALFORMED_QUERY_FILE, format("actual hash code %s is different from expected sqlFileHexHash %s", actualHexHashCode, sqlFileHexHash.get()));
}
} catch (NoSuchAlgorithmException e) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "unsupported hash algorithm", e);
}
}
sql = new String(sqlFileBytes, UTF_8);
}
log.info("Query: %s", sql);
QueryStateTimer queryStateTimer = new QueryStateTimer(systemTicker());
queryStateTimer.beginPlanning();
QueryId queryId = queryIdGenerator.createNextQueryId();
log.info("Starting execution for presto query: %s", queryId);
System.out.printf("Query id: %s\n", queryId);
sparkContext.conf().set(PRESTO_QUERY_ID_CONFIG, queryId.getId());
SessionContext sessionContext = PrestoSparkSessionContext.createFromSessionInfo(prestoSparkSession, credentialsProviders, authenticatorProviders);
Session session = sessionSupplier.createSession(queryId, sessionContext);
session = sessionPropertyDefaults.newSessionWithDefaultProperties(session, Optional.empty(), Optional.empty());
WarningCollector warningCollector = warningCollectorFactory.create(getWarningHandlingLevel(session));
PlanAndMore planAndMore = null;
try {
TransactionId transactionId = transactionManager.beginTransaction(true);
session = session.beginTransactionId(transactionId, transactionManager, accessControl);
queryMonitor.queryCreatedEvent(new BasicQueryInfo(createQueryInfo(session, sql, PLANNING, Optional.empty(), sparkQueueName, Optional.empty(), queryStateTimer, Optional.empty(), warningCollector)));
// including queueing time
Duration queryMaxRunTime = getQueryMaxRunTime(session);
// excluding queueing time
Duration queryMaxExecutionTime = getQueryMaxExecutionTime(session);
// pick a smaller one as we are not tracking queueing for Presto on Spark
Duration queryTimeout = queryMaxRunTime.compareTo(queryMaxExecutionTime) < 0 ? queryMaxRunTime : queryMaxExecutionTime;
long queryCompletionDeadline = System.currentTimeMillis() + queryTimeout.toMillis();
settingsRequirements.verify(sparkContext, session);
queryStateTimer.beginAnalyzing();
PreparedQuery preparedQuery = queryPreparer.prepareQuery(session, sql, warningCollector);
Optional<QueryType> queryType = StatementUtils.getQueryType(preparedQuery.getStatement().getClass());
if (queryType.isPresent() && (queryType.get() == QueryType.DATA_DEFINITION)) {
queryStateTimer.endAnalysis();
DDLDefinitionTask<?> task = (DDLDefinitionTask<?>) ddlTasks.get(preparedQuery.getStatement().getClass());
return new PrestoSparkDataDefinitionExecution(task, preparedQuery.getStatement(), transactionManager, accessControl, metadata, session, queryStateTimer, warningCollector);
} else {
planAndMore = queryPlanner.createQueryPlan(session, preparedQuery, warningCollector);
SubPlan fragmentedPlan = planFragmenter.fragmentQueryPlan(session, planAndMore.getPlan(), warningCollector);
log.info(textDistributedPlan(fragmentedPlan, metadata.getFunctionAndTypeManager(), session, true));
fragmentedPlan = configureOutputPartitioning(session, fragmentedPlan);
TableWriteInfo tableWriteInfo = getTableWriteInfo(session, fragmentedPlan);
JavaSparkContext javaSparkContext = new JavaSparkContext(sparkContext);
CollectionAccumulator<SerializedTaskInfo> taskInfoCollector = new CollectionAccumulator<>();
taskInfoCollector.register(sparkContext, Option.empty(), false);
CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector = new CollectionAccumulator<>();
shuffleStatsCollector.register(sparkContext, Option.empty(), false);
TempStorage tempStorage = tempStorageManager.getTempStorage(storageBasedBroadcastJoinStorage);
queryStateTimer.endAnalysis();
return new PrestoSparkQueryExecution(javaSparkContext, session, queryMonitor, taskInfoCollector, shuffleStatsCollector, prestoSparkTaskExecutorFactory, executorFactoryProvider, queryStateTimer, warningCollector, sql, planAndMore, fragmentedPlan, sparkQueueName, taskInfoCodec, sparkTaskDescriptorJsonCodec, queryStatusInfoJsonCodec, queryDataJsonCodec, rddFactory, tableWriteInfo, transactionManager, createPagesSerde(blockEncodingManager), executionExceptionFactory, queryTimeout, queryCompletionDeadline, metadataStorage, queryStatusInfoOutputLocation, queryDataOutputLocation, tempStorage, nodeMemoryConfig, waitTimeMetrics);
}
} catch (Throwable executionFailure) {
queryStateTimer.beginFinishing();
try {
rollback(session, transactionManager);
} catch (RuntimeException rollbackFailure) {
log.error(rollbackFailure, "Encountered error when performing rollback");
}
queryStateTimer.endQuery();
Optional<ExecutionFailureInfo> failureInfo = Optional.empty();
if (executionFailure instanceof PrestoSparkExecutionException) {
failureInfo = executionExceptionFactory.extractExecutionFailureInfo((PrestoSparkExecutionException) executionFailure);
verify(failureInfo.isPresent());
}
if (!failureInfo.isPresent()) {
failureInfo = Optional.of(toFailure(executionFailure));
}
try {
QueryInfo queryInfo = createQueryInfo(session, sql, FAILED, Optional.ofNullable(planAndMore), sparkQueueName, failureInfo, queryStateTimer, Optional.empty(), warningCollector);
queryMonitor.queryCompletedEvent(queryInfo);
if (queryStatusInfoOutputLocation.isPresent()) {
PrestoSparkQueryStatusInfo prestoSparkQueryStatusInfo = createPrestoSparkQueryInfo(queryInfo, Optional.ofNullable(planAndMore), warningCollector, OptionalLong.empty());
metadataStorage.write(queryStatusInfoOutputLocation.get(), queryStatusInfoJsonCodec.toJsonBytes(prestoSparkQueryStatusInfo));
}
} catch (RuntimeException eventFailure) {
log.error(eventFailure, "Error publishing query immediate failure event");
}
throw failureInfo.get().toFailure();
}
}
Aggregations