Search in sources :

Example 1 with Distribution

use of com.facebook.airlift.stats.Distribution in project presto by prestodb.

the class PrestoSparkQueryExecutionFactory method createStageInfo.

private static StageInfo createStageInfo(QueryId queryId, SubPlan plan, ListMultimap<PlanFragmentId, TaskInfo> taskInfoMap) {
    PlanFragmentId planFragmentId = plan.getFragment().getId();
    StageId stageId = new StageId(queryId, planFragmentId.getId());
    List<TaskInfo> taskInfos = taskInfoMap.get(planFragmentId);
    long peakUserMemoryReservationInBytes = 0;
    long peakNodeTotalMemoryReservationInBytes = 0;
    for (TaskInfo taskInfo : taskInfos) {
        long taskPeakUserMemoryInBytes = taskInfo.getStats().getUserMemoryReservationInBytes();
        peakUserMemoryReservationInBytes += taskPeakUserMemoryInBytes;
        peakNodeTotalMemoryReservationInBytes = max(taskInfo.getStats().getPeakNodeTotalMemoryInBytes(), peakNodeTotalMemoryReservationInBytes);
    }
    StageExecutionInfo stageExecutionInfo = StageExecutionInfo.create(new StageExecutionId(stageId, 0), // TODO: figure out a way to know what exactly stage has caused a failure
    StageExecutionState.FINISHED, Optional.empty(), taskInfos, DateTime.now(), new Distribution().snapshot(), succinctBytes(peakUserMemoryReservationInBytes), succinctBytes(peakNodeTotalMemoryReservationInBytes), 1, 1);
    return new StageInfo(stageId, URI.create("http://fake.invalid/stage/" + stageId), Optional.of(plan.getFragment()), stageExecutionInfo, ImmutableList.of(), plan.getChildren().stream().map(child -> createStageInfo(queryId, child, taskInfoMap)).collect(toImmutableList()), false);
}
Also used : TaskInfo(com.facebook.presto.execution.TaskInfo) SerializedTaskInfo(com.facebook.presto.spark.classloader_interface.SerializedTaskInfo) StageInfo(com.facebook.presto.execution.StageInfo) StageId(com.facebook.presto.execution.StageId) Distribution(com.facebook.airlift.stats.Distribution) StageExecutionInfo(com.facebook.presto.execution.StageExecutionInfo) PlanFragmentId(com.facebook.presto.sql.planner.plan.PlanFragmentId) StageExecutionId(com.facebook.presto.execution.StageExecutionId)

Example 2 with Distribution

use of com.facebook.airlift.stats.Distribution in project presto by prestodb.

the class TestOrcBatchPageSourceMemoryTracking method testMaxReadBytes.

@Test(dataProvider = "rowCount")
public void testMaxReadBytes(int rowCount) throws Exception {
    int maxReadBytes = 1_000;
    HiveClientConfig config = new HiveClientConfig();
    config.setOrcMaxReadBlockSize(new DataSize(maxReadBytes, BYTE));
    ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
    // Build a table where every row gets larger, so we can test that the "batchSize" reduces
    int numColumns = 5;
    int step = 250;
    ImmutableList.Builder<TestColumn> columnBuilder = ImmutableList.<TestColumn>builder().add(new TestColumn("p_empty_string", javaStringObjectInspector, () -> "", true));
    GrowingTestColumn[] dataColumns = new GrowingTestColumn[numColumns];
    for (int i = 0; i < numColumns; i++) {
        dataColumns[i] = new GrowingTestColumn("p_string", javaStringObjectInspector, () -> Long.toHexString(random.nextLong()), false, step * (i + 1));
        columnBuilder.add(dataColumns[i]);
    }
    List<TestColumn> testColumns = columnBuilder.build();
    File tempFile = File.createTempFile("presto_test_orc_page_source_max_read_bytes", "orc");
    tempFile.delete();
    TestPreparer testPreparer = new TestPreparer(tempFile.getAbsolutePath(), testColumns, rowCount, rowCount);
    ConnectorPageSource pageSource = testPreparer.newPageSource(stats, session);
    try {
        int positionCount = 0;
        while (true) {
            Page page = pageSource.getNextPage();
            if (pageSource.isFinished()) {
                break;
            }
            assertNotNull(page);
            page = page.getLoadedPage();
            positionCount += page.getPositionCount();
            // ignore the first MAX_BATCH_SIZE rows given the sizes are set when loading the blocks
            if (positionCount > MAX_BATCH_SIZE) {
                // either the block is bounded by maxReadBytes or we just load one single large block
                // an error margin MAX_BATCH_SIZE / step is needed given the block sizes are increasing
                assertTrue(page.getSizeInBytes() < maxReadBytes * (MAX_BATCH_SIZE / step) || 1 == page.getPositionCount());
            }
        }
        // verify the stats are correctly recorded
        Distribution distribution = stats.getMaxCombinedBytesPerRow().getAllTime();
        assertEquals((int) distribution.getCount(), 1);
        // the block is VariableWidthBlock that contains valueIsNull and offsets arrays as overhead
        assertEquals((int) distribution.getMax(), Arrays.stream(dataColumns).mapToInt(GrowingTestColumn::getMaxSize).sum() + (Integer.BYTES + Byte.BYTES) * numColumns);
        pageSource.close();
    } finally {
        tempFile.delete();
    }
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Page(com.facebook.presto.common.Page) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) DataSize(io.airlift.units.DataSize) Distribution(com.facebook.airlift.stats.Distribution) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) CacheConfig(com.facebook.presto.cache.CacheConfig) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Test(org.testng.annotations.Test)

Example 3 with Distribution

use of com.facebook.airlift.stats.Distribution in project presto by prestodb.

the class QueryMonitor method computeStageStatistics.

private static void computeStageStatistics(StageInfo stageInfo, ImmutableList.Builder<StageStatistics> stageStatisticsBuilder) {
    Distribution cpuDistribution = new Distribution();
    Distribution memoryDistribution = new Distribution();
    StageExecutionInfo executionInfo = stageInfo.getLatestAttemptExecutionInfo();
    for (TaskInfo taskInfo : executionInfo.getTasks()) {
        cpuDistribution.add(NANOSECONDS.toMillis(taskInfo.getStats().getTotalCpuTimeInNanos()));
        memoryDistribution.add(taskInfo.getStats().getPeakTotalMemoryInBytes());
    }
    stageStatisticsBuilder.add(new StageStatistics(stageInfo.getStageId().getId(), executionInfo.getStats().getGcInfo().getStageExecutionId(), executionInfo.getTasks().size(), executionInfo.getStats().getTotalScheduledTime(), executionInfo.getStats().getTotalCpuTime(), executionInfo.getStats().getRetriedCpuTime(), executionInfo.getStats().getTotalBlockedTime(), executionInfo.getStats().getRawInputDataSize(), executionInfo.getStats().getProcessedInputDataSize(), executionInfo.getStats().getPhysicalWrittenDataSize(), executionInfo.getStats().getGcInfo(), createResourceDistribution(cpuDistribution.snapshot()), createResourceDistribution(memoryDistribution.snapshot())));
    stageInfo.getSubStages().forEach(subStage -> computeStageStatistics(subStage, stageStatisticsBuilder));
}
Also used : TaskInfo(com.facebook.presto.execution.TaskInfo) ResourceDistribution(com.facebook.presto.spi.eventlistener.ResourceDistribution) Distribution(com.facebook.airlift.stats.Distribution) StageExecutionInfo(com.facebook.presto.execution.StageExecutionInfo) StageStatistics(com.facebook.presto.spi.eventlistener.StageStatistics)

Example 4 with Distribution

use of com.facebook.airlift.stats.Distribution in project presto by prestodb.

the class PipelineContext method getPipelineStats.

public PipelineStats getPipelineStats() {
    // check for end state to avoid callback ordering problems
    if (taskContext.getState().isDone()) {
        DateTime now = DateTime.now();
        executionStartTime.compareAndSet(null, now);
        lastExecutionStartTime.compareAndSet(null, now);
        lastExecutionEndTime.compareAndSet(null, now);
    }
    int completedDrivers = this.completedDrivers.get();
    List<DriverContext> driverContexts = ImmutableList.copyOf(this.drivers);
    int totalSplits = this.totalSplits.get();
    PipelineStatusBuilder pipelineStatusBuilder = new PipelineStatusBuilder(totalSplits, completedDrivers, getActivePartitionedSplitsWeight(), partitioned);
    int totalDrivers = completedDrivers + driverContexts.size();
    Distribution queuedTime = new Distribution(this.queuedTime);
    Distribution elapsedTime = new Distribution(this.elapsedTime);
    long totalScheduledTime = this.totalScheduledTime.get();
    long totalCpuTime = this.totalCpuTime.get();
    long totalBlockedTime = this.totalBlockedTime.get();
    long totalAllocation = this.totalAllocation.get();
    long rawInputDataSize = this.rawInputDataSize.getTotalCount();
    long rawInputPositions = this.rawInputPositions.getTotalCount();
    long processedInputDataSize = this.processedInputDataSize.getTotalCount();
    long processedInputPositions = this.processedInputPositions.getTotalCount();
    long outputDataSize = this.outputDataSize.getTotalCount();
    long outputPositions = this.outputPositions.getTotalCount();
    long physicalWrittenDataSize = this.physicalWrittenDataSize.get();
    ImmutableSet.Builder<BlockedReason> blockedReasons = ImmutableSet.builder();
    boolean hasUnfinishedDrivers = false;
    boolean unfinishedDriversFullyBlocked = true;
    TreeMap<Integer, OperatorStats> operatorSummaries = new TreeMap<>(this.operatorSummaries);
    ListMultimap<Integer, OperatorStats> runningOperators = ArrayListMultimap.create();
    ImmutableList.Builder<DriverStats> drivers = ImmutableList.builderWithExpectedSize(driverContexts.size());
    for (DriverContext driverContext : driverContexts) {
        DriverStats driverStats = driverContext.getDriverStats();
        drivers.add(driverStats);
        pipelineStatusBuilder.accumulate(driverStats, driverContext.getSplitWeight());
        if (driverStats.getStartTime() != null && driverStats.getEndTime() == null) {
            // driver has started running, but not yet completed
            hasUnfinishedDrivers = true;
            unfinishedDriversFullyBlocked &= driverStats.isFullyBlocked();
            blockedReasons.addAll(driverStats.getBlockedReasons());
        }
        queuedTime.add(driverStats.getQueuedTime().roundTo(NANOSECONDS));
        elapsedTime.add(driverStats.getElapsedTime().roundTo(NANOSECONDS));
        totalScheduledTime += driverStats.getTotalScheduledTime().roundTo(NANOSECONDS);
        totalCpuTime += driverStats.getTotalCpuTime().roundTo(NANOSECONDS);
        totalBlockedTime += driverStats.getTotalBlockedTime().roundTo(NANOSECONDS);
        totalAllocation += driverStats.getTotalAllocation().toBytes();
        for (OperatorStats operatorStats : driverStats.getOperatorStats()) {
            runningOperators.put(operatorStats.getOperatorId(), operatorStats);
        }
        rawInputDataSize += driverStats.getRawInputDataSize().toBytes();
        rawInputPositions += driverStats.getRawInputPositions();
        processedInputDataSize += driverStats.getProcessedInputDataSize().toBytes();
        processedInputPositions += driverStats.getProcessedInputPositions();
        outputDataSize += driverStats.getOutputDataSize().toBytes();
        outputPositions += driverStats.getOutputPositions();
        physicalWrittenDataSize += driverStats.getPhysicalWrittenDataSize().toBytes();
    }
    // merge the running operator stats into the operator summary
    for (Integer operatorId : runningOperators.keySet()) {
        List<OperatorStats> runningStats = runningOperators.get(operatorId);
        if (runningStats.isEmpty()) {
            continue;
        }
        OperatorStats current = operatorSummaries.get(operatorId);
        OperatorStats combined;
        if (current != null) {
            combined = current.add(runningStats);
        } else {
            combined = runningStats.get(0);
            if (runningStats.size() > 1) {
                combined = combined.add(runningStats.subList(1, runningStats.size()));
            }
        }
        operatorSummaries.put(operatorId, combined);
    }
    PipelineStatus pipelineStatus = pipelineStatusBuilder.build();
    boolean fullyBlocked = hasUnfinishedDrivers && unfinishedDriversFullyBlocked;
    return new PipelineStats(pipelineId, executionStartTime.get(), lastExecutionStartTime.get(), lastExecutionEndTime.get(), inputPipeline, outputPipeline, totalDrivers, pipelineStatus.getQueuedDrivers(), pipelineStatus.getQueuedPartitionedDrivers(), pipelineStatus.getQueuedPartitionedSplitsWeight(), pipelineStatus.getRunningDrivers(), pipelineStatus.getRunningPartitionedDrivers(), pipelineStatus.getRunningPartitionedSplitsWeight(), pipelineStatus.getBlockedDrivers(), completedDrivers, pipelineMemoryContext.getUserMemory(), pipelineMemoryContext.getRevocableMemory(), pipelineMemoryContext.getSystemMemory(), queuedTime.snapshot(), elapsedTime.snapshot(), totalScheduledTime, totalCpuTime, totalBlockedTime, fullyBlocked, blockedReasons.build(), totalAllocation, rawInputDataSize, rawInputPositions, processedInputDataSize, processedInputPositions, outputDataSize, outputPositions, physicalWrittenDataSize, ImmutableList.copyOf(operatorSummaries.values()), drivers.build());
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) TreeMap(java.util.TreeMap) DateTime(org.joda.time.DateTime) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ImmutableSet(com.google.common.collect.ImmutableSet) Distribution(com.facebook.airlift.stats.Distribution)

Aggregations

Distribution (com.facebook.airlift.stats.Distribution)4 StageExecutionInfo (com.facebook.presto.execution.StageExecutionInfo)2 TaskInfo (com.facebook.presto.execution.TaskInfo)2 ImmutableList (com.google.common.collect.ImmutableList)2 CacheConfig (com.facebook.presto.cache.CacheConfig)1 Page (com.facebook.presto.common.Page)1 StageExecutionId (com.facebook.presto.execution.StageExecutionId)1 StageId (com.facebook.presto.execution.StageId)1 StageInfo (com.facebook.presto.execution.StageInfo)1 SerializedTaskInfo (com.facebook.presto.spark.classloader_interface.SerializedTaskInfo)1 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)1 ConnectorSession (com.facebook.presto.spi.ConnectorSession)1 ResourceDistribution (com.facebook.presto.spi.eventlistener.ResourceDistribution)1 StageStatistics (com.facebook.presto.spi.eventlistener.StageStatistics)1 PlanFragmentId (com.facebook.presto.sql.planner.plan.PlanFragmentId)1 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 DataSize (io.airlift.units.DataSize)1 File (java.io.File)1 TreeMap (java.util.TreeMap)1