use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.
the class TestHiveSplitSource method testPreloadSplitsForRewindableSplitSource.
@Test
public void testPreloadSplitsForRewindableSplitSource() throws Exception {
HiveSplitSource hiveSplitSource = HiveSplitSource.bucketedRewindable(SESSION, "database", "table", new CacheQuotaRequirement(GLOBAL, Optional.empty()), 10, new DataSize(1, MEGABYTE), new TestingHiveSplitLoader(), EXECUTOR, new CounterStat());
for (int i = 0; i < 10; i++) {
hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.of(0)));
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
}
SettableFuture<List<ConnectorSplit>> splits = SettableFuture.create();
// create a thread that will get the splits
CountDownLatch started = new CountDownLatch(1);
Thread getterThread = new Thread(() -> {
try {
started.countDown();
List<ConnectorSplit> batch = getSplits(hiveSplitSource, OptionalInt.of(0), 10);
splits.set(batch);
} catch (Throwable e) {
splits.setException(e);
}
});
getterThread.start();
try {
// wait for the thread to be started
assertTrue(started.await(10, SECONDS));
// scheduling will not start before noMoreSplits is called to ensure we preload all splits.
MILLISECONDS.sleep(200);
assertFalse(splits.isDone());
// wait for thread to get the splits after noMoreSplit signal is sent
hiveSplitSource.noMoreSplits();
List<ConnectorSplit> connectorSplits = splits.get(10, SECONDS);
assertEquals(connectorSplits.size(), 0);
assertFalse(hiveSplitSource.isFinished());
connectorSplits = getSplits(hiveSplitSource, OptionalInt.of(0), 10);
for (int i = 0; i < 10; i++) {
assertEquals(((HiveSplit) connectorSplits.get(i)).getPartitionDataColumnCount(), i);
}
assertTrue(hiveSplitSource.isFinished());
} finally {
getterThread.interrupt();
}
}
use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableLayoutHandle layoutHandle, SplitSchedulingContext splitSchedulingContext) {
HiveTableLayoutHandle layout = (HiveTableLayoutHandle) layoutHandle;
SchemaTableName tableName = layout.getSchemaTableName();
// get table metadata
TransactionalMetadata metadata = hiveTransactionManager.get(transaction);
if (metadata == null) {
throw new PrestoException(HIVE_TRANSACTION_NOT_FOUND, format("Transaction not found: %s", transaction));
}
SemiTransactionalHiveMetastore metastore = metadata.getMetastore();
Table table = metastore.getTable(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider()), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (!isOfflineDataDebugModeEnabled(session)) {
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
}
// get partitions
List<HivePartition> partitions = layout.getPartitions().orElseThrow(() -> new PrestoException(GENERIC_INTERNAL_ERROR, "Layout does not contain partitions"));
// short circuit if we don't have any partitions
HivePartition partition = Iterables.getFirst(partitions, null);
if (partition == null) {
return new FixedSplitSource(ImmutableList.of());
}
Optional<HiveBucketFilter> bucketFilter = layout.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = layout.getBucketHandle();
if ((splitSchedulingContext.getSplitSchedulingStrategy() == GROUPED_SCHEDULING) && !bucketHandle.isPresent()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
if (bucketHandle.isPresent()) {
if (bucketHandle.get().getReadBucketCount() > bucketHandle.get().getTableBucketCount()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "readBucketCount (%s) is greater than the tableBucketCount (%s) which generally points to an issue in plan generation");
}
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(metastore, table, tableName, partitions, bucketHandle, session, splitSchedulingContext.getWarningCollector(), layout.getRequestedColumns(), layout.getPredicateColumns(), layout.getDomainPredicate().getDomains());
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hivePartitions, getPathDomain(layout.getDomainPredicate(), layout.getPredicateColumns()), createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, // Avoid over-committing split loader concurrency
min(splitLoaderConcurrency, partitions.size()), recursiveDfsWalkerEnabled, splitSchedulingContext.schedulerUsesHostAddresses(), layout.isPartialAggregationsPushedDown());
HiveSplitSource splitSource;
CacheQuotaRequirement cacheQuotaRequirement = cacheQuotaRequirementProvider.getCacheQuotaRequirement(table.getDatabaseName(), table.getTableName());
switch(splitSchedulingContext.getSplitSchedulingStrategy()) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplits, maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplits, maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
break;
case REWINDABLE_GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketedRewindable(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingContext.getSplitSchedulingStrategy());
}
hiveSplitLoader.start(splitSource);
return splitSource;
}
use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.
the class TestMemoryRevokingScheduler method newSqlTask.
private SqlTask newSqlTask(QueryId queryId, MemoryPool memoryPool) {
QueryContext queryContext = getOrCreateQueryContext(queryId, memoryPool);
TaskId taskId = new TaskId(queryId.getId(), 0, 0, idGeneator.incrementAndGet());
URI location = URI.create("fake://task/" + taskId);
return createSqlTask(taskId, location, "fake", queryContext, sqlTaskExecutionFactory, new MockExchangeClientSupplier(), singleThreadedExecutor, Functions.identity(), new DataSize(32, MEGABYTE), new CounterStat(), new SpoolingOutputBufferFactory(new FeaturesConfig()));
}
use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.
the class TestHiveSplitManager method assertRedundantColumnDomains.
private void assertRedundantColumnDomains(Range predicateRange, PartitionStatistics partitionStatistics, List<Set<ColumnHandle>> expectedRedundantColumnDomains, HiveColumnHandle columnHandle) throws Exception {
// Prepare query predicate tuple domain
TupleDomain<ColumnHandle> queryTupleDomain = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(new ColumnDomain<>(columnHandle, Domain.create(SortedRangeSet.copyOf(predicateRange.getType(), ImmutableList.of(predicateRange)), false)))));
// Prepare partition with stats
PartitionWithStatistics partitionWithStatistics = new PartitionWithStatistics(new Partition("test_db", "test_table", ImmutableList.of(PARTITION_VALUE), new Storage(fromHiveStorageFormat(ORC), "location", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of()), COLUMNS, ImmutableMap.of(), Optional.empty(), false, true, 0), PARTITION_NAME, partitionStatistics);
HiveClientConfig hiveClientConfig = new HiveClientConfig().setPartitionStatisticsBasedOptimizationEnabled(true);
HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, new MetastoreClientConfig()), ImmutableSet.of()), new MetastoreClientConfig(), new NoHdfsAuthentication());
HiveMetadataFactory metadataFactory = new HiveMetadataFactory(new TestingExtendedHiveMetastore(TEST_TABLE, partitionWithStatistics), hdfsEnvironment, new HivePartitionManager(FUNCTION_AND_TYPE_MANAGER, hiveClientConfig), DateTimeZone.forOffsetHours(1), true, false, false, false, true, true, hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getMaxPartitionsPerScan(), false, FUNCTION_AND_TYPE_MANAGER, new HiveLocationService(hdfsEnvironment), FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, FILTER_STATS_CALCULATOR_SERVICE, new TableParameterCodec(), HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, executor, new HiveTypeTranslator(), new HiveStagingFileCommitter(hdfsEnvironment, executor), new HiveZeroRowFileCreator(hdfsEnvironment, new OutputStreamDataSinkFactory(), executor), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), new HiveEncryptionInformationProvider(ImmutableList.of()), new HivePartitionStats(), new HiveFileRenamer(), HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
HiveSplitManager splitManager = new HiveSplitManager(new TestingHiveTransactionManager(metadataFactory), new NamenodeStats(), hdfsEnvironment, new TestingDirectoryLister(), directExecutor(), new HiveCoercionPolicy(FUNCTION_AND_TYPE_MANAGER), new CounterStat(), 100, hiveClientConfig.getMaxOutstandingSplitsSize(), hiveClientConfig.getMinPartitionBatchSize(), hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getSplitLoaderConcurrency(), false, new ConfigBasedCacheQuotaRequirementProvider(new CacheConfig()), new HiveEncryptionInformationProvider(ImmutableList.of()));
HiveColumnHandle partitionColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(VARCHAR), MAX_PARTITION_KEY_COLUMN_INDEX, PARTITION_KEY, Optional.empty(), Optional.empty());
List<HivePartition> partitions = ImmutableList.of(new HivePartition(new SchemaTableName("test_schema", "test_table"), PARTITION_NAME, ImmutableMap.of(partitionColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice(PARTITION_VALUE)))));
TupleDomain<Subfield> domainPredicate = queryTupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
ConnectorSplitSource splitSource = splitManager.getSplits(new HiveTransactionHandle(), new TestingConnectorSession(new HiveSessionProperties(hiveClientConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new HiveTableLayoutHandle(new SchemaTableName("test_schema", "test_table"), "test_path", ImmutableList.of(partitionColumn), COLUMNS, ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(partitionColumn.getName(), partitionColumn, columnHandle.getName(), columnHandle), queryTupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), SPLIT_SCHEDULING_CONTEXT);
List<Set<ColumnHandle>> actualRedundantColumnDomains = splitSource.getNextBatch(NOT_PARTITIONED, 100).get().getSplits().stream().map(HiveSplit.class::cast).map(HiveSplit::getRedundantColumnDomains).collect(toImmutableList());
assertEquals(actualRedundantColumnDomains, expectedRedundantColumnDomains);
}
use of com.facebook.airlift.stats.CounterStat in project presto by prestodb.
the class TestHiveSplitSource method testRewindOneBucket.
@Test
public void testRewindOneBucket() {
HiveSplitSource hiveSplitSource = HiveSplitSource.bucketedRewindable(SESSION, "database", "table", new CacheQuotaRequirement(GLOBAL, Optional.empty()), 10, new DataSize(1, MEGABYTE), new TestingHiveSplitLoader(), EXECUTOR, new CounterStat());
for (int i = 0; i < 10; i++) {
hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.of(0)));
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
}
hiveSplitSource.noMoreSplits();
// Rewind when split is not retrieved.
hiveSplitSource.rewind(new HivePartitionHandle(0));
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 10);
// Rewind when split is partially retrieved.
assertEquals(getSplits(hiveSplitSource, OptionalInt.of(0), 5).size(), 5);
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 5);
hiveSplitSource.rewind(new HivePartitionHandle(0));
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 10);
// Rewind when split is fully retrieved
assertEquals(getSplits(hiveSplitSource, OptionalInt.of(0), 10).size(), 10);
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 0);
hiveSplitSource.rewind(new HivePartitionHandle(0));
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 10);
}
Aggregations