Search in sources :

Example 11 with CounterStat

use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.

the class TestHiveSplitSource method testPreloadSplitsForRewindableSplitSource.

@Test
public void testPreloadSplitsForRewindableSplitSource() throws Exception {
    HiveSplitSource hiveSplitSource = HiveSplitSource.bucketedRewindable(SESSION, "database", "table", new CacheQuotaRequirement(GLOBAL, Optional.empty()), 10, new DataSize(1, MEGABYTE), new TestingHiveSplitLoader(), EXECUTOR, new CounterStat());
    for (int i = 0; i < 10; i++) {
        hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.of(0)));
        assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
    }
    SettableFuture<List<ConnectorSplit>> splits = SettableFuture.create();
    // create a thread that will get the splits
    CountDownLatch started = new CountDownLatch(1);
    Thread getterThread = new Thread(() -> {
        try {
            started.countDown();
            List<ConnectorSplit> batch = getSplits(hiveSplitSource, OptionalInt.of(0), 10);
            splits.set(batch);
        } catch (Throwable e) {
            splits.setException(e);
        }
    });
    getterThread.start();
    try {
        // wait for the thread to be started
        assertTrue(started.await(10, SECONDS));
        // scheduling will not start before noMoreSplits is called to ensure we preload all splits.
        MILLISECONDS.sleep(200);
        assertFalse(splits.isDone());
        // wait for thread to get the splits after noMoreSplit signal is sent
        hiveSplitSource.noMoreSplits();
        List<ConnectorSplit> connectorSplits = splits.get(10, SECONDS);
        assertEquals(connectorSplits.size(), 0);
        assertFalse(hiveSplitSource.isFinished());
        connectorSplits = getSplits(hiveSplitSource, OptionalInt.of(0), 10);
        for (int i = 0; i < 10; i++) {
            assertEquals(((HiveSplit) connectorSplits.get(i)).getPartitionDataColumnCount(), i);
        }
        assertTrue(hiveSplitSource.isFinished());
    } finally {
        getterThread.interrupt();
    }
}
Also used : CounterStat(com.facebook.airlift.stats.CounterStat) CountDownLatch(java.util.concurrent.CountDownLatch) DataSize(io.airlift.units.DataSize) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) Test(org.testng.annotations.Test)

Example 12 with CounterStat

use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.

the class HiveSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableLayoutHandle layoutHandle, SplitSchedulingContext splitSchedulingContext) {
    HiveTableLayoutHandle layout = (HiveTableLayoutHandle) layoutHandle;
    SchemaTableName tableName = layout.getSchemaTableName();
    // get table metadata
    TransactionalMetadata metadata = hiveTransactionManager.get(transaction);
    if (metadata == null) {
        throw new PrestoException(HIVE_TRANSACTION_NOT_FOUND, format("Transaction not found: %s", transaction));
    }
    SemiTransactionalHiveMetastore metastore = metadata.getMetastore();
    Table table = metastore.getTable(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider()), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    if (!isOfflineDataDebugModeEnabled(session)) {
        // verify table is not marked as non-readable
        String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
        if (!isNullOrEmpty(tableNotReadable)) {
            throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
        }
    }
    // get partitions
    List<HivePartition> partitions = layout.getPartitions().orElseThrow(() -> new PrestoException(GENERIC_INTERNAL_ERROR, "Layout does not contain partitions"));
    // short circuit if we don't have any partitions
    HivePartition partition = Iterables.getFirst(partitions, null);
    if (partition == null) {
        return new FixedSplitSource(ImmutableList.of());
    }
    Optional<HiveBucketFilter> bucketFilter = layout.getBucketFilter();
    // validate bucket bucketed execution
    Optional<HiveBucketHandle> bucketHandle = layout.getBucketHandle();
    if ((splitSchedulingContext.getSplitSchedulingStrategy() == GROUPED_SCHEDULING) && !bucketHandle.isPresent()) {
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
    }
    if (bucketHandle.isPresent()) {
        if (bucketHandle.get().getReadBucketCount() > bucketHandle.get().getTableBucketCount()) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "readBucketCount (%s) is greater than the tableBucketCount (%s) which generally points to an issue in plan generation");
        }
    }
    // sort partitions
    partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
    Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(metastore, table, tableName, partitions, bucketHandle, session, splitSchedulingContext.getWarningCollector(), layout.getRequestedColumns(), layout.getPredicateColumns(), layout.getDomainPredicate().getDomains());
    HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hivePartitions, getPathDomain(layout.getDomainPredicate(), layout.getPredicateColumns()), createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, // Avoid over-committing split loader concurrency
    min(splitLoaderConcurrency, partitions.size()), recursiveDfsWalkerEnabled, splitSchedulingContext.schedulerUsesHostAddresses(), layout.isPartialAggregationsPushedDown());
    HiveSplitSource splitSource;
    CacheQuotaRequirement cacheQuotaRequirement = cacheQuotaRequirementProvider.getCacheQuotaRequirement(table.getDatabaseName(), table.getTableName());
    switch(splitSchedulingContext.getSplitSchedulingStrategy()) {
        case UNGROUPED_SCHEDULING:
            splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplits, maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
            break;
        case GROUPED_SCHEDULING:
            splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplits, maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
            break;
        case REWINDABLE_GROUPED_SCHEDULING:
            splitSource = HiveSplitSource.bucketedRewindable(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
            break;
        default:
            throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingContext.getSplitSchedulingStrategy());
    }
    hiveSplitLoader.start(splitSource);
    return splitSource;
}
Also used : SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) CounterStat(com.facebook.airlift.stats.CounterStat) PrestoException(com.facebook.presto.spi.PrestoException) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) Table(com.facebook.presto.hive.metastore.Table) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) SchemaTableName(com.facebook.presto.spi.SchemaTableName) HiveBucketFilter(com.facebook.presto.hive.HiveBucketing.HiveBucketFilter)

Example 13 with CounterStat

use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.

the class TestMemoryRevokingScheduler method newSqlTask.

private SqlTask newSqlTask(QueryId queryId, MemoryPool memoryPool) {
    QueryContext queryContext = getOrCreateQueryContext(queryId, memoryPool);
    TaskId taskId = new TaskId(queryId.getId(), 0, 0, idGeneator.incrementAndGet());
    URI location = URI.create("fake://task/" + taskId);
    return createSqlTask(taskId, location, "fake", queryContext, sqlTaskExecutionFactory, new MockExchangeClientSupplier(), singleThreadedExecutor, Functions.identity(), new DataSize(32, MEGABYTE), new CounterStat(), new SpoolingOutputBufferFactory(new FeaturesConfig()));
}
Also used : MockExchangeClientSupplier(com.facebook.presto.execution.TestSqlTaskManager.MockExchangeClientSupplier) CounterStat(com.facebook.airlift.stats.CounterStat) FeaturesConfig(com.facebook.presto.sql.analyzer.FeaturesConfig) DataSize(io.airlift.units.DataSize) QueryContext(com.facebook.presto.memory.QueryContext) SpoolingOutputBufferFactory(com.facebook.presto.execution.buffer.SpoolingOutputBufferFactory) URI(java.net.URI)

Example 14 with CounterStat

use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.

the class TestHiveSplitManager method assertRedundantColumnDomains.

private void assertRedundantColumnDomains(Range predicateRange, PartitionStatistics partitionStatistics, List<Set<ColumnHandle>> expectedRedundantColumnDomains, HiveColumnHandle columnHandle) throws Exception {
    // Prepare query predicate tuple domain
    TupleDomain<ColumnHandle> queryTupleDomain = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(new ColumnDomain<>(columnHandle, Domain.create(SortedRangeSet.copyOf(predicateRange.getType(), ImmutableList.of(predicateRange)), false)))));
    // Prepare partition with stats
    PartitionWithStatistics partitionWithStatistics = new PartitionWithStatistics(new Partition("test_db", "test_table", ImmutableList.of(PARTITION_VALUE), new Storage(fromHiveStorageFormat(ORC), "location", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of()), COLUMNS, ImmutableMap.of(), Optional.empty(), false, true, 0), PARTITION_NAME, partitionStatistics);
    HiveClientConfig hiveClientConfig = new HiveClientConfig().setPartitionStatisticsBasedOptimizationEnabled(true);
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, new MetastoreClientConfig()), ImmutableSet.of()), new MetastoreClientConfig(), new NoHdfsAuthentication());
    HiveMetadataFactory metadataFactory = new HiveMetadataFactory(new TestingExtendedHiveMetastore(TEST_TABLE, partitionWithStatistics), hdfsEnvironment, new HivePartitionManager(FUNCTION_AND_TYPE_MANAGER, hiveClientConfig), DateTimeZone.forOffsetHours(1), true, false, false, false, true, true, hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getMaxPartitionsPerScan(), false, FUNCTION_AND_TYPE_MANAGER, new HiveLocationService(hdfsEnvironment), FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, FILTER_STATS_CALCULATOR_SERVICE, new TableParameterCodec(), HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, executor, new HiveTypeTranslator(), new HiveStagingFileCommitter(hdfsEnvironment, executor), new HiveZeroRowFileCreator(hdfsEnvironment, new OutputStreamDataSinkFactory(), executor), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), new HiveEncryptionInformationProvider(ImmutableList.of()), new HivePartitionStats(), new HiveFileRenamer(), HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    HiveSplitManager splitManager = new HiveSplitManager(new TestingHiveTransactionManager(metadataFactory), new NamenodeStats(), hdfsEnvironment, new TestingDirectoryLister(), directExecutor(), new HiveCoercionPolicy(FUNCTION_AND_TYPE_MANAGER), new CounterStat(), 100, hiveClientConfig.getMaxOutstandingSplitsSize(), hiveClientConfig.getMinPartitionBatchSize(), hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getSplitLoaderConcurrency(), false, new ConfigBasedCacheQuotaRequirementProvider(new CacheConfig()), new HiveEncryptionInformationProvider(ImmutableList.of()));
    HiveColumnHandle partitionColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(VARCHAR), MAX_PARTITION_KEY_COLUMN_INDEX, PARTITION_KEY, Optional.empty(), Optional.empty());
    List<HivePartition> partitions = ImmutableList.of(new HivePartition(new SchemaTableName("test_schema", "test_table"), PARTITION_NAME, ImmutableMap.of(partitionColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice(PARTITION_VALUE)))));
    TupleDomain<Subfield> domainPredicate = queryTupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
    ConnectorSplitSource splitSource = splitManager.getSplits(new HiveTransactionHandle(), new TestingConnectorSession(new HiveSessionProperties(hiveClientConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new HiveTableLayoutHandle(new SchemaTableName("test_schema", "test_table"), "test_path", ImmutableList.of(partitionColumn), COLUMNS, ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(partitionColumn.getName(), partitionColumn, columnHandle.getName(), columnHandle), queryTupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), SPLIT_SCHEDULING_CONTEXT);
    List<Set<ColumnHandle>> actualRedundantColumnDomains = splitSource.getNextBatch(NOT_PARTITIONED, 100).get().getSplits().stream().map(HiveSplit.class::cast).map(HiveSplit::getRedundantColumnDomains).collect(toImmutableList());
    assertEquals(actualRedundantColumnDomains, expectedRedundantColumnDomains);
}
Also used : CounterStat(com.facebook.airlift.stats.CounterStat) Subfield(com.facebook.presto.common.Subfield) ColumnHandle(com.facebook.presto.spi.ColumnHandle) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Storage(com.facebook.presto.hive.metastore.Storage) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) Set(java.util.Set) SortedRangeSet(com.facebook.presto.common.predicate.SortedRangeSet) ImmutableSet(com.google.common.collect.ImmutableSet) NoHdfsAuthentication(com.facebook.presto.hive.authentication.NoHdfsAuthentication) CacheConfig(com.facebook.presto.cache.CacheConfig) Partition(com.facebook.presto.hive.metastore.Partition) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics)

Example 15 with CounterStat

use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.

the class TestHiveSplitSource method testRewindOneBucket.

@Test
public void testRewindOneBucket() {
    HiveSplitSource hiveSplitSource = HiveSplitSource.bucketedRewindable(SESSION, "database", "table", new CacheQuotaRequirement(GLOBAL, Optional.empty()), 10, new DataSize(1, MEGABYTE), new TestingHiveSplitLoader(), EXECUTOR, new CounterStat());
    for (int i = 0; i < 10; i++) {
        hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.of(0)));
        assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
    }
    hiveSplitSource.noMoreSplits();
    // Rewind when split is not retrieved.
    hiveSplitSource.rewind(new HivePartitionHandle(0));
    assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 10);
    // Rewind when split is partially retrieved.
    assertEquals(getSplits(hiveSplitSource, OptionalInt.of(0), 5).size(), 5);
    assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 5);
    hiveSplitSource.rewind(new HivePartitionHandle(0));
    assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 10);
    // Rewind when split is fully retrieved
    assertEquals(getSplits(hiveSplitSource, OptionalInt.of(0), 10).size(), 10);
    assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 0);
    hiveSplitSource.rewind(new HivePartitionHandle(0));
    assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 10);
}
Also used : CounterStat(com.facebook.airlift.stats.CounterStat) DataSize(io.airlift.units.DataSize) Test(org.testng.annotations.Test)

Aggregations

CounterStat (com.facebook.airlift.stats.CounterStat)20 DataSize (io.airlift.units.DataSize)12 Test (org.testng.annotations.Test)10 CacheConfig (com.facebook.presto.cache.CacheConfig)3 NoHdfsAuthentication (com.facebook.presto.hive.authentication.NoHdfsAuthentication)3 OutputStreamDataSinkFactory (com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory)3 SchemaTableName (com.facebook.presto.spi.SchemaTableName)3 FeaturesConfig (com.facebook.presto.sql.analyzer.FeaturesConfig)3 GroupByHashPageIndexerFactory (com.facebook.presto.GroupByHashPageIndexerFactory)2 MockExchangeClientSupplier (com.facebook.presto.execution.TestSqlTaskManager.MockExchangeClientSupplier)2 SpoolingOutputBufferFactory (com.facebook.presto.execution.buffer.SpoolingOutputBufferFactory)2 QueryContext (com.facebook.presto.memory.QueryContext)2 ConnectorSplit (com.facebook.presto.spi.ConnectorSplit)2 PrestoException (com.facebook.presto.spi.PrestoException)2 TestingNodeManager (com.facebook.presto.testing.TestingNodeManager)2 URI (java.net.URI)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 BoundedExecutor (com.facebook.airlift.concurrent.BoundedExecutor)1 TestingGcMonitor (com.facebook.airlift.stats.TestingGcMonitor)1 Subfield (com.facebook.presto.common.Subfield)1