use of io.trino.operator.GroupByHashPageIndexerFactory in project trino by trinodb.
the class AbstractTestHive method setup.
protected final void setup(String databaseName, HiveConfig hiveConfig, HiveMetastore hiveMetastore, HdfsEnvironment hdfsConfiguration) {
setupHive(databaseName);
metastoreClient = hiveMetastore;
hdfsEnvironment = hdfsConfiguration;
HivePartitionManager partitionManager = new HivePartitionManager(hiveConfig);
locationService = new HiveLocationService(hdfsEnvironment);
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
metadataFactory = new HiveMetadataFactory(new CatalogName("hive"), HiveMetastoreFactory.ofInstance(metastoreClient), hdfsEnvironment, partitionManager, 10, 10, 10, false, false, false, true, true, false, 1000, Optional.empty(), true, TESTING_TYPE_MANAGER, NOOP_METADATA_PROVIDER, locationService, partitionUpdateCodec, newFixedThreadPool(2), heartbeatService, TEST_SERVER_VERSION, (session, tableHandle) -> {
if (!tableHandle.getTableName().contains("apply_redirection_tester")) {
return Optional.empty();
}
return Optional.of(new TableScanRedirectApplicationResult(new CatalogSchemaTableName("hive", databaseName, "mock_redirection_target"), ImmutableMap.of(), TupleDomain.all()));
}, ImmutableSet.of(new PartitionsSystemTableProvider(partitionManager, TESTING_TYPE_MANAGER), new PropertiesSystemTableProvider()), metastore -> new NoneHiveMaterializedViewMetadata() {
@Override
public Optional<ConnectorMaterializedViewDefinition> getMaterializedView(ConnectorSession session, SchemaTableName viewName) {
if (!viewName.getTableName().contains("materialized_view_tester")) {
return Optional.empty();
}
return Optional.of(new ConnectorMaterializedViewDefinition("dummy_view_sql", Optional.empty(), Optional.empty(), Optional.empty(), ImmutableList.of(new ConnectorMaterializedViewDefinition.Column("abc", TypeId.of("type"))), Optional.empty(), Optional.of("alice"), ImmutableMap.of()));
}
}, SqlStandardAccessControlMetadata::new, NO_REDIRECTIONS, TableInvalidationCallback.NOOP);
transactionManager = new HiveTransactionManager(metadataFactory);
splitManager = new HiveSplitManager(transactionManager, partitionManager, new NamenodeStats(), hdfsEnvironment, new CachingDirectoryLister(hiveConfig), directExecutor(), new CounterStat(), 100, hiveConfig.getMaxOutstandingSplitsSize(), hiveConfig.getMinPartitionBatchSize(), hiveConfig.getMaxPartitionBatchSize(), hiveConfig.getMaxInitialSplits(), hiveConfig.getSplitLoaderConcurrency(), hiveConfig.getMaxSplitsPerSecond(), false, TESTING_TYPE_MANAGER);
pageSinkProvider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), hdfsEnvironment, PAGE_SORTER, HiveMetastoreFactory.ofInstance(metastoreClient), new GroupByHashPageIndexerFactory(JOIN_COMPILER, BLOCK_TYPE_OPERATORS), TESTING_TYPE_MANAGER, getHiveConfig(), locationService, partitionUpdateCodec, new TestingNodeManager("fake-environment"), new HiveEventClient(), getHiveSessionProperties(hiveConfig), new HiveWriterStats());
pageSourceProvider = new HivePageSourceProvider(TESTING_TYPE_MANAGER, hdfsEnvironment, hiveConfig, getDefaultHivePageSourceFactories(hdfsEnvironment, hiveConfig), getDefaultHiveRecordCursorProviders(hiveConfig, hdfsEnvironment), new GenericHiveRecordCursorProvider(hdfsEnvironment, hiveConfig), Optional.empty());
nodePartitioningProvider = new HiveNodePartitioningProvider(new TestingNodeManager("fake-environment"), TESTING_TYPE_MANAGER);
}
use of io.trino.operator.GroupByHashPageIndexerFactory in project trino by trinodb.
the class TestHivePageSink method createPageSink.
private static ConnectorPageSink createPageSink(HiveTransactionHandle transaction, HiveConfig config, HiveMetastore metastore, Path outputPath, HiveWriterStats stats) {
LocationHandle locationHandle = new LocationHandle(outputPath, outputPath, false, DIRECT_TO_TARGET_NEW_DIRECTORY);
HiveOutputTableHandle handle = new HiveOutputTableHandle(SCHEMA_NAME, TABLE_NAME, getColumnHandles(), new HivePageSinkMetadata(new SchemaTableName(SCHEMA_NAME, TABLE_NAME), metastore.getTable(SCHEMA_NAME, TABLE_NAME), ImmutableMap.of()), locationHandle, config.getHiveStorageFormat(), config.getHiveStorageFormat(), ImmutableList.of(), Optional.empty(), "test", ImmutableMap.of(), NO_ACID_TRANSACTION, false, false);
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
TypeOperators typeOperators = new TypeOperators();
BlockTypeOperators blockTypeOperators = new BlockTypeOperators(typeOperators);
HivePageSinkProvider provider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(config, HDFS_ENVIRONMENT), HDFS_ENVIRONMENT, PAGE_SORTER, HiveMetastoreFactory.ofInstance(metastore), new GroupByHashPageIndexerFactory(new JoinCompiler(typeOperators), blockTypeOperators), TESTING_TYPE_MANAGER, config, new HiveLocationService(HDFS_ENVIRONMENT), partitionUpdateCodec, new TestingNodeManager("fake-environment"), new HiveEventClient(), getHiveSessionProperties(config), stats);
return provider.createPageSink(transaction, getHiveSession(config), handle);
}
use of io.trino.operator.GroupByHashPageIndexerFactory in project trino by trinodb.
the class TestDeltaLakePageSink method createPageSink.
private static ConnectorPageSink createPageSink(Path outputPath, DeltaLakeWriterStats stats) {
HiveTransactionHandle transaction = new HiveTransactionHandle(false);
DeltaLakeConfig deltaLakeConfig = new DeltaLakeConfig();
DeltaLakeOutputTableHandle tableHandle = new DeltaLakeOutputTableHandle(SCHEMA_NAME, TABLE_NAME, getColumnHandles(), outputPath.toString(), Optional.of(deltaLakeConfig.getDefaultCheckpointWritingInterval()), true);
DeltaLakePageSinkProvider provider = new DeltaLakePageSinkProvider(new GroupByHashPageIndexerFactory(new JoinCompiler(new TypeOperators()), new BlockTypeOperators()), HDFS_ENVIRONMENT, JsonCodec.jsonCodec(DataFileInfo.class), stats, deltaLakeConfig, new TestingTypeManager(), new NodeVersion("test-version"));
return provider.createPageSink(transaction, SESSION, tableHandle);
}
use of io.trino.operator.GroupByHashPageIndexerFactory in project trino by trinodb.
the class AbstractTestHiveFileSystem method setup.
protected void setup(String host, int port, String databaseName, boolean s3SelectPushdownEnabled, HdfsConfiguration hdfsConfiguration) {
database = databaseName;
table = new SchemaTableName(database, "trino_test_external_fs");
tableWithHeader = new SchemaTableName(database, "trino_test_external_fs_with_header");
tableWithHeaderAndFooter = new SchemaTableName(database, "trino_test_external_fs_with_header_and_footer");
String random = randomUUID().toString().toLowerCase(ENGLISH).replace("-", "");
temporaryCreateTable = new SchemaTableName(database, "tmp_trino_test_create_" + random);
config = new HiveConfig().setS3SelectPushdownEnabled(s3SelectPushdownEnabled);
Optional<HostAndPort> proxy = Optional.ofNullable(System.getProperty("hive.metastore.thrift.client.socks-proxy")).map(HostAndPort::fromString);
MetastoreLocator metastoreLocator = new TestingMetastoreLocator(proxy, HostAndPort.fromParts(host, port));
HivePartitionManager hivePartitionManager = new HivePartitionManager(config);
hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, new HdfsConfig(), new NoHdfsAuthentication());
MetastoreConfig metastoreConfig = new MetastoreConfig();
metastoreClient = new TestingHiveMetastore(new BridgingHiveMetastore(new ThriftHiveMetastore(metastoreLocator, new HiveConfig(), metastoreConfig, new ThriftMetastoreConfig(), hdfsEnvironment, false), new HiveIdentity(getHiveSession(config).getIdentity())), getBasePath(), hdfsEnvironment);
locationService = new HiveLocationService(hdfsEnvironment);
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
metadataFactory = new HiveMetadataFactory(new CatalogName("hive"), config, metastoreConfig, HiveMetastoreFactory.ofInstance(metastoreClient), hdfsEnvironment, hivePartitionManager, newDirectExecutorService(), heartbeatService, TESTING_TYPE_MANAGER, NOOP_METADATA_PROVIDER, locationService, partitionUpdateCodec, new NodeVersion("test_version"), new NoneHiveRedirectionsProvider(), ImmutableSet.of(new PartitionsSystemTableProvider(hivePartitionManager, TESTING_TYPE_MANAGER), new PropertiesSystemTableProvider()), new DefaultHiveMaterializedViewMetadataFactory(), SqlStandardAccessControlMetadata::new, NO_REDIRECTIONS, TableInvalidationCallback.NOOP);
transactionManager = new HiveTransactionManager(metadataFactory);
splitManager = new HiveSplitManager(transactionManager, hivePartitionManager, new NamenodeStats(), hdfsEnvironment, new CachingDirectoryLister(new HiveConfig()), new BoundedExecutor(executor, config.getMaxSplitIteratorThreads()), new CounterStat(), config.getMaxOutstandingSplits(), config.getMaxOutstandingSplitsSize(), config.getMinPartitionBatchSize(), config.getMaxPartitionBatchSize(), config.getMaxInitialSplits(), config.getSplitLoaderConcurrency(), config.getMaxSplitsPerSecond(), config.getRecursiveDirWalkerEnabled(), TESTING_TYPE_MANAGER);
TypeOperators typeOperators = new TypeOperators();
BlockTypeOperators blockTypeOperators = new BlockTypeOperators(typeOperators);
pageSinkProvider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(config, hdfsEnvironment), hdfsEnvironment, PAGE_SORTER, HiveMetastoreFactory.ofInstance(metastoreClient), new GroupByHashPageIndexerFactory(new JoinCompiler(typeOperators), blockTypeOperators), TESTING_TYPE_MANAGER, config, locationService, partitionUpdateCodec, new TestingNodeManager("fake-environment"), new HiveEventClient(), getHiveSessionProperties(config), new HiveWriterStats());
pageSourceProvider = new HivePageSourceProvider(TESTING_TYPE_MANAGER, hdfsEnvironment, config, getDefaultHivePageSourceFactories(hdfsEnvironment, config), getDefaultHiveRecordCursorProviders(config, hdfsEnvironment), new GenericHiveRecordCursorProvider(hdfsEnvironment, config), Optional.empty());
onSetupComplete();
}
Aggregations