Search in sources :

Example 1 with PrestoSparkShuffleStats

use of com.facebook.presto.spark.classloader_interface.PrestoSparkShuffleStats in project presto by prestodb.

the class PrestoSparkShufflePageInput method getNextPage.

@Override
public Page getNextPage(UpdateMemory updateMemory) {
    SliceOutput output = new DynamicSliceOutput(types.isEmpty() ? 0 : BUFFER_SIZE);
    int rowCount = 0;
    synchronized (this) {
        while (currentIteratorIndex < shuffleInputs.size()) {
            PrestoSparkShuffleInput input = shuffleInputs.get(currentIteratorIndex);
            Iterator<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> iterator = input.getIterator();
            long currentIteratorProcessedBytes = 0;
            long currentIteratorProcessedRows = 0;
            long currentIteratorProcessedRowBatches = 0;
            long start = System.currentTimeMillis();
            while (iterator.hasNext() && output.size() <= TARGET_SIZE && rowCount <= MAX_ROWS_PER_PAGE) {
                currentIteratorProcessedRowBatches++;
                PrestoSparkMutableRow row = iterator.next()._2;
                if (row.getBuffer() != null) {
                    ByteBuffer buffer = row.getBuffer();
                    verify(buffer.remaining() >= 2, "row data is expected to be at least 2 bytes long");
                    currentIteratorProcessedBytes += buffer.remaining();
                    short entryRowCount = getShortLittleEndian(buffer);
                    rowCount += entryRowCount;
                    currentIteratorProcessedRows += entryRowCount;
                    buffer.position(buffer.position() + 2);
                    output.writeBytes(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
                } else if (row.getArray() != null) {
                    verify(row.getLength() >= 2, "row data is expected to be at least 2 bytes long");
                    currentIteratorProcessedBytes += row.getLength();
                    short entryRowCount = getShortLittleEndian(row.getArray(), row.getOffset());
                    rowCount += entryRowCount;
                    currentIteratorProcessedRows += entryRowCount;
                    output.writeBytes(row.getArray(), row.getOffset() + 2, row.getLength() - 2);
                } else {
                    throw new IllegalArgumentException("Unexpected PrestoSparkMutableRow: 'buffer' and 'array' fields are both null");
                }
            }
            long end = System.currentTimeMillis();
            shuffleStats.accumulate(currentIteratorProcessedRows, currentIteratorProcessedRowBatches, currentIteratorProcessedBytes, end - start);
            if (!iterator.hasNext()) {
                shuffleStatsCollector.add(new PrestoSparkShuffleStats(input.getFragmentId(), taskId, READ, shuffleStats.getProcessedRows(), shuffleStats.getProcessedRowBatches(), shuffleStats.getProcessedBytes(), shuffleStats.getElapsedWallTimeMills()));
                shuffleStats.reset();
                currentIteratorIndex++;
            } else {
                break;
            }
        }
    }
    if (rowCount == 0) {
        return null;
    }
    return createPage(rowCount, output.slice().getInput(), types);
}
Also used : SliceOutput(io.airlift.slice.SliceOutput) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) PrestoSparkShuffleStats(com.facebook.presto.spark.classloader_interface.PrestoSparkShuffleStats) Tuple2(scala.Tuple2) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) ByteBuffer(java.nio.ByteBuffer) PrestoSparkMutableRow(com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow)

Example 2 with PrestoSparkShuffleStats

use of com.facebook.presto.spark.classloader_interface.PrestoSparkShuffleStats in project presto by prestodb.

the class PrestoSparkQueryExecutionFactory method create.

@Override
public IPrestoSparkQueryExecution create(SparkContext sparkContext, PrestoSparkSession prestoSparkSession, Optional<String> sqlText, Optional<String> sqlLocation, Optional<String> sqlFileHexHash, Optional<String> sqlFileSizeInBytes, Optional<String> sparkQueueName, PrestoSparkTaskExecutorFactoryProvider executorFactoryProvider, Optional<String> queryStatusInfoOutputLocation, Optional<String> queryDataOutputLocation) {
    PrestoSparkConfInitializer.checkInitialized(sparkContext);
    String sql;
    if (sqlText.isPresent()) {
        checkArgument(!sqlLocation.isPresent(), "sqlText and sqlLocation should not be set at the same time");
        sql = sqlText.get();
    } else {
        checkArgument(sqlLocation.isPresent(), "sqlText or sqlLocation must be present");
        byte[] sqlFileBytes = metadataStorage.read(sqlLocation.get());
        if (sqlFileSizeInBytes.isPresent()) {
            if (Integer.valueOf(sqlFileSizeInBytes.get()) != sqlFileBytes.length) {
                throw new PrestoException(MALFORMED_QUERY_FILE, format("sql file size %s is different from expected sqlFileSizeInBytes %s", sqlFileBytes.length, sqlFileSizeInBytes.get()));
            }
        }
        if (sqlFileHexHash.isPresent()) {
            try {
                MessageDigest md = MessageDigest.getInstance("SHA-512");
                String actualHexHashCode = BaseEncoding.base16().lowerCase().encode(md.digest(sqlFileBytes));
                if (!sqlFileHexHash.get().equals(actualHexHashCode)) {
                    throw new PrestoException(MALFORMED_QUERY_FILE, format("actual hash code %s is different from expected sqlFileHexHash %s", actualHexHashCode, sqlFileHexHash.get()));
                }
            } catch (NoSuchAlgorithmException e) {
                throw new PrestoException(GENERIC_INTERNAL_ERROR, "unsupported hash algorithm", e);
            }
        }
        sql = new String(sqlFileBytes, UTF_8);
    }
    log.info("Query: %s", sql);
    QueryStateTimer queryStateTimer = new QueryStateTimer(systemTicker());
    queryStateTimer.beginPlanning();
    QueryId queryId = queryIdGenerator.createNextQueryId();
    log.info("Starting execution for presto query: %s", queryId);
    System.out.printf("Query id: %s\n", queryId);
    sparkContext.conf().set(PRESTO_QUERY_ID_CONFIG, queryId.getId());
    SessionContext sessionContext = PrestoSparkSessionContext.createFromSessionInfo(prestoSparkSession, credentialsProviders, authenticatorProviders);
    Session session = sessionSupplier.createSession(queryId, sessionContext);
    session = sessionPropertyDefaults.newSessionWithDefaultProperties(session, Optional.empty(), Optional.empty());
    WarningCollector warningCollector = warningCollectorFactory.create(getWarningHandlingLevel(session));
    PlanAndMore planAndMore = null;
    try {
        TransactionId transactionId = transactionManager.beginTransaction(true);
        session = session.beginTransactionId(transactionId, transactionManager, accessControl);
        queryMonitor.queryCreatedEvent(new BasicQueryInfo(createQueryInfo(session, sql, PLANNING, Optional.empty(), sparkQueueName, Optional.empty(), queryStateTimer, Optional.empty(), warningCollector)));
        // including queueing time
        Duration queryMaxRunTime = getQueryMaxRunTime(session);
        // excluding queueing time
        Duration queryMaxExecutionTime = getQueryMaxExecutionTime(session);
        // pick a smaller one as we are not tracking queueing for Presto on Spark
        Duration queryTimeout = queryMaxRunTime.compareTo(queryMaxExecutionTime) < 0 ? queryMaxRunTime : queryMaxExecutionTime;
        long queryCompletionDeadline = System.currentTimeMillis() + queryTimeout.toMillis();
        settingsRequirements.verify(sparkContext, session);
        queryStateTimer.beginAnalyzing();
        PreparedQuery preparedQuery = queryPreparer.prepareQuery(session, sql, warningCollector);
        Optional<QueryType> queryType = StatementUtils.getQueryType(preparedQuery.getStatement().getClass());
        if (queryType.isPresent() && (queryType.get() == QueryType.DATA_DEFINITION)) {
            queryStateTimer.endAnalysis();
            DDLDefinitionTask<?> task = (DDLDefinitionTask<?>) ddlTasks.get(preparedQuery.getStatement().getClass());
            return new PrestoSparkDataDefinitionExecution(task, preparedQuery.getStatement(), transactionManager, accessControl, metadata, session, queryStateTimer, warningCollector);
        } else {
            planAndMore = queryPlanner.createQueryPlan(session, preparedQuery, warningCollector);
            SubPlan fragmentedPlan = planFragmenter.fragmentQueryPlan(session, planAndMore.getPlan(), warningCollector);
            log.info(textDistributedPlan(fragmentedPlan, metadata.getFunctionAndTypeManager(), session, true));
            fragmentedPlan = configureOutputPartitioning(session, fragmentedPlan);
            TableWriteInfo tableWriteInfo = getTableWriteInfo(session, fragmentedPlan);
            JavaSparkContext javaSparkContext = new JavaSparkContext(sparkContext);
            CollectionAccumulator<SerializedTaskInfo> taskInfoCollector = new CollectionAccumulator<>();
            taskInfoCollector.register(sparkContext, Option.empty(), false);
            CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector = new CollectionAccumulator<>();
            shuffleStatsCollector.register(sparkContext, Option.empty(), false);
            TempStorage tempStorage = tempStorageManager.getTempStorage(storageBasedBroadcastJoinStorage);
            queryStateTimer.endAnalysis();
            return new PrestoSparkQueryExecution(javaSparkContext, session, queryMonitor, taskInfoCollector, shuffleStatsCollector, prestoSparkTaskExecutorFactory, executorFactoryProvider, queryStateTimer, warningCollector, sql, planAndMore, fragmentedPlan, sparkQueueName, taskInfoCodec, sparkTaskDescriptorJsonCodec, queryStatusInfoJsonCodec, queryDataJsonCodec, rddFactory, tableWriteInfo, transactionManager, createPagesSerde(blockEncodingManager), executionExceptionFactory, queryTimeout, queryCompletionDeadline, metadataStorage, queryStatusInfoOutputLocation, queryDataOutputLocation, tempStorage, nodeMemoryConfig, waitTimeMetrics);
        }
    } catch (Throwable executionFailure) {
        queryStateTimer.beginFinishing();
        try {
            rollback(session, transactionManager);
        } catch (RuntimeException rollbackFailure) {
            log.error(rollbackFailure, "Encountered error when performing rollback");
        }
        queryStateTimer.endQuery();
        Optional<ExecutionFailureInfo> failureInfo = Optional.empty();
        if (executionFailure instanceof PrestoSparkExecutionException) {
            failureInfo = executionExceptionFactory.extractExecutionFailureInfo((PrestoSparkExecutionException) executionFailure);
            verify(failureInfo.isPresent());
        }
        if (!failureInfo.isPresent()) {
            failureInfo = Optional.of(toFailure(executionFailure));
        }
        try {
            QueryInfo queryInfo = createQueryInfo(session, sql, FAILED, Optional.ofNullable(planAndMore), sparkQueueName, failureInfo, queryStateTimer, Optional.empty(), warningCollector);
            queryMonitor.queryCompletedEvent(queryInfo);
            if (queryStatusInfoOutputLocation.isPresent()) {
                PrestoSparkQueryStatusInfo prestoSparkQueryStatusInfo = createPrestoSparkQueryInfo(queryInfo, Optional.ofNullable(planAndMore), warningCollector, OptionalLong.empty());
                metadataStorage.write(queryStatusInfoOutputLocation.get(), queryStatusInfoJsonCodec.toJsonBytes(prestoSparkQueryStatusInfo));
            }
        } catch (RuntimeException eventFailure) {
            log.error(eventFailure, "Error publishing query immediate failure event");
        }
        throw failureInfo.get().toFailure();
    }
}
Also used : PrestoException(com.facebook.presto.spi.PrestoException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) DDLDefinitionTask(com.facebook.presto.execution.DDLDefinitionTask) BasicQueryInfo(com.facebook.presto.server.BasicQueryInfo) QueryInfo(com.facebook.presto.execution.QueryInfo) SerializedTaskInfo(com.facebook.presto.spark.classloader_interface.SerializedTaskInfo) CollectionAccumulator(org.apache.spark.util.CollectionAccumulator) WarningCollector(com.facebook.presto.spi.WarningCollector) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) MessageDigest(java.security.MessageDigest) TableWriteInfo.createTableWriteInfo(com.facebook.presto.execution.scheduler.TableWriteInfo.createTableWriteInfo) TableWriteInfo(com.facebook.presto.execution.scheduler.TableWriteInfo) Optional(java.util.Optional) QueryId(com.facebook.presto.spi.QueryId) BasicQueryInfo(com.facebook.presto.server.BasicQueryInfo) PreparedQuery(com.facebook.presto.execution.QueryPreparer.PreparedQuery) Duration(io.airlift.units.Duration) QueryStateTimer(com.facebook.presto.execution.QueryStateTimer) PrestoSparkDataDefinitionExecution(com.facebook.presto.spark.execution.PrestoSparkDataDefinitionExecution) TransactionId(com.facebook.presto.transaction.TransactionId) PlanAndMore(com.facebook.presto.spark.planner.PrestoSparkQueryPlanner.PlanAndMore) TempStorage(com.facebook.presto.spi.storage.TempStorage) PrestoSparkShuffleStats(com.facebook.presto.spark.classloader_interface.PrestoSparkShuffleStats) SessionContext(com.facebook.presto.server.SessionContext) QueryType(com.facebook.presto.spi.resourceGroups.QueryType) IPrestoSparkQueryExecution(com.facebook.presto.spark.classloader_interface.IPrestoSparkQueryExecution) SubPlan(com.facebook.presto.sql.planner.SubPlan) StreamingSubPlan(com.facebook.presto.execution.scheduler.StreamingSubPlan) PrestoSparkExecutionException(com.facebook.presto.spark.classloader_interface.PrestoSparkExecutionException) Session(com.facebook.presto.Session) ConnectorSession(com.facebook.presto.spi.ConnectorSession) PrestoSparkSession(com.facebook.presto.spark.classloader_interface.PrestoSparkSession) PrestoSparkSessionProperties.getSparkBroadcastJoinMaxMemoryOverride(com.facebook.presto.spark.PrestoSparkSessionProperties.getSparkBroadcastJoinMaxMemoryOverride)

Aggregations

PrestoSparkShuffleStats (com.facebook.presto.spark.classloader_interface.PrestoSparkShuffleStats)2 Session (com.facebook.presto.Session)1 DDLDefinitionTask (com.facebook.presto.execution.DDLDefinitionTask)1 QueryInfo (com.facebook.presto.execution.QueryInfo)1 PreparedQuery (com.facebook.presto.execution.QueryPreparer.PreparedQuery)1 QueryStateTimer (com.facebook.presto.execution.QueryStateTimer)1 StreamingSubPlan (com.facebook.presto.execution.scheduler.StreamingSubPlan)1 TableWriteInfo (com.facebook.presto.execution.scheduler.TableWriteInfo)1 TableWriteInfo.createTableWriteInfo (com.facebook.presto.execution.scheduler.TableWriteInfo.createTableWriteInfo)1 BasicQueryInfo (com.facebook.presto.server.BasicQueryInfo)1 SessionContext (com.facebook.presto.server.SessionContext)1 PrestoSparkSessionProperties.getSparkBroadcastJoinMaxMemoryOverride (com.facebook.presto.spark.PrestoSparkSessionProperties.getSparkBroadcastJoinMaxMemoryOverride)1 IPrestoSparkQueryExecution (com.facebook.presto.spark.classloader_interface.IPrestoSparkQueryExecution)1 PrestoSparkExecutionException (com.facebook.presto.spark.classloader_interface.PrestoSparkExecutionException)1 PrestoSparkMutableRow (com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow)1 PrestoSparkSession (com.facebook.presto.spark.classloader_interface.PrestoSparkSession)1 SerializedTaskInfo (com.facebook.presto.spark.classloader_interface.SerializedTaskInfo)1 PrestoSparkDataDefinitionExecution (com.facebook.presto.spark.execution.PrestoSparkDataDefinitionExecution)1 PlanAndMore (com.facebook.presto.spark.planner.PrestoSparkQueryPlanner.PlanAndMore)1 ConnectorSession (com.facebook.presto.spi.ConnectorSession)1