use of io.trino.plugin.base.CatalogName in project trino by trinodb.
the class AbstractTestHive method setup.
protected final void setup(String databaseName, HiveConfig hiveConfig, HiveMetastore hiveMetastore, HdfsEnvironment hdfsConfiguration) {
setupHive(databaseName);
metastoreClient = hiveMetastore;
hdfsEnvironment = hdfsConfiguration;
HivePartitionManager partitionManager = new HivePartitionManager(hiveConfig);
locationService = new HiveLocationService(hdfsEnvironment);
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
metadataFactory = new HiveMetadataFactory(new CatalogName("hive"), HiveMetastoreFactory.ofInstance(metastoreClient), hdfsEnvironment, partitionManager, 10, 10, 10, false, false, false, true, true, false, 1000, Optional.empty(), true, TESTING_TYPE_MANAGER, NOOP_METADATA_PROVIDER, locationService, partitionUpdateCodec, newFixedThreadPool(2), heartbeatService, TEST_SERVER_VERSION, (session, tableHandle) -> {
if (!tableHandle.getTableName().contains("apply_redirection_tester")) {
return Optional.empty();
}
return Optional.of(new TableScanRedirectApplicationResult(new CatalogSchemaTableName("hive", databaseName, "mock_redirection_target"), ImmutableMap.of(), TupleDomain.all()));
}, ImmutableSet.of(new PartitionsSystemTableProvider(partitionManager, TESTING_TYPE_MANAGER), new PropertiesSystemTableProvider()), metastore -> new NoneHiveMaterializedViewMetadata() {
@Override
public Optional<ConnectorMaterializedViewDefinition> getMaterializedView(ConnectorSession session, SchemaTableName viewName) {
if (!viewName.getTableName().contains("materialized_view_tester")) {
return Optional.empty();
}
return Optional.of(new ConnectorMaterializedViewDefinition("dummy_view_sql", Optional.empty(), Optional.empty(), Optional.empty(), ImmutableList.of(new ConnectorMaterializedViewDefinition.Column("abc", TypeId.of("type"))), Optional.empty(), Optional.of("alice"), ImmutableMap.of()));
}
}, SqlStandardAccessControlMetadata::new, NO_REDIRECTIONS, TableInvalidationCallback.NOOP);
transactionManager = new HiveTransactionManager(metadataFactory);
splitManager = new HiveSplitManager(transactionManager, partitionManager, new NamenodeStats(), hdfsEnvironment, new CachingDirectoryLister(hiveConfig), directExecutor(), new CounterStat(), 100, hiveConfig.getMaxOutstandingSplitsSize(), hiveConfig.getMinPartitionBatchSize(), hiveConfig.getMaxPartitionBatchSize(), hiveConfig.getMaxInitialSplits(), hiveConfig.getSplitLoaderConcurrency(), hiveConfig.getMaxSplitsPerSecond(), false, TESTING_TYPE_MANAGER);
pageSinkProvider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), hdfsEnvironment, PAGE_SORTER, HiveMetastoreFactory.ofInstance(metastoreClient), new GroupByHashPageIndexerFactory(JOIN_COMPILER, BLOCK_TYPE_OPERATORS), TESTING_TYPE_MANAGER, getHiveConfig(), locationService, partitionUpdateCodec, new TestingNodeManager("fake-environment"), new HiveEventClient(), getHiveSessionProperties(hiveConfig), new HiveWriterStats());
pageSourceProvider = new HivePageSourceProvider(TESTING_TYPE_MANAGER, hdfsEnvironment, hiveConfig, getDefaultHivePageSourceFactories(hdfsEnvironment, hiveConfig), getDefaultHiveRecordCursorProviders(hiveConfig, hdfsEnvironment), new GenericHiveRecordCursorProvider(hdfsEnvironment, hiveConfig), Optional.empty());
nodePartitioningProvider = new HiveNodePartitioningProvider(new TestingNodeManager("fake-environment"), TESTING_TYPE_MANAGER);
}
use of io.trino.plugin.base.CatalogName in project trino by trinodb.
the class VacuumProcedure method doVacuum.
private void doVacuum(ConnectorSession session, String schema, String table, String retention) throws IOException {
checkProcedureArgument(schema != null, "schema_name cannot be null");
checkProcedureArgument(!schema.isEmpty(), "schema_name cannot be empty");
checkProcedureArgument(table != null, "table_name cannot be null");
checkProcedureArgument(!table.isEmpty(), "table_name cannot be empty");
checkProcedureArgument(retention != null, "retention cannot be null");
Duration retentionDuration = Duration.valueOf(retention);
Duration minRetention = getVacuumMinRetention(session);
checkProcedureArgument(retentionDuration.compareTo(minRetention) >= 0, "Retention specified (%s) is shorter than the minimum retention configured in the system (%s). " + "Minimum retention can be changed with %s configuration property or %s.%s session property", retentionDuration, minRetention, DeltaLakeConfig.VACUUM_MIN_RETENTION, catalogName, DeltaLakeSessionProperties.VACUUM_MIN_RETENTION);
Instant threshold = Instant.now().minusMillis(retentionDuration.toMillis());
DeltaLakeMetadata metadata = metadataFactory.create(session.getIdentity());
SchemaTableName tableName = new SchemaTableName(schema, table);
DeltaLakeTableHandle handle = metadata.getTableHandle(session, tableName);
checkProcedureArgument(handle != null, "Table '%s' does not exist", tableName);
TableSnapshot tableSnapshot = transactionLogAccess.loadSnapshot(tableName, new Path(handle.getLocation()), session);
Path tableLocation = tableSnapshot.getTableLocation();
Path transactionLogDir = getTransactionLogDir(tableLocation);
FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(session), tableLocation);
String commonPathPrefix = tableLocation + "/";
String queryId = session.getQueryId();
// Retain all active files and every file removed by a "recent" transaction (except for the oldest "recent").
// Any remaining file are not live, and not needed to read any "recent" snapshot.
List<Long> recentVersions = transactionLogAccess.getPastTableVersions(fileSystem, transactionLogDir, threshold, tableSnapshot.getVersion());
Set<String> retainedPaths = Stream.concat(transactionLogAccess.getActiveFiles(tableSnapshot, session).stream().map(AddFileEntry::getPath), transactionLogAccess.getJsonEntries(fileSystem, transactionLogDir, // active files, but still needed to read a "recent" snapshot
recentVersions.stream().sorted(naturalOrder()).skip(1).collect(toImmutableList())).map(DeltaLakeTransactionLogEntry::getRemove).filter(Objects::nonNull).map(RemoveFileEntry::getPath)).peek(path -> checkState(!path.startsWith(tableLocation.toString()), "Unexpected absolute path in transaction log: %s", path)).collect(toImmutableSet());
log.debug("[%s] attempting to vacuum table %s [%s] with %s retention (expiry threshold %s). %s data file paths marked for retention", queryId, tableName, tableLocation, retention, threshold, retainedPaths.size());
long nonFiles = 0;
long allPathsChecked = 0;
long transactionLogFiles = 0;
long retainedKnownFiles = 0;
long retainedUnknownFiles = 0;
long removedFiles = 0;
RemoteIterator<LocatedFileStatus> listing = fileSystem.listFiles(tableLocation, true);
while (listing.hasNext()) {
LocatedFileStatus fileStatus = listing.next();
Path path = fileStatus.getPath();
checkState(path.toString().startsWith(commonPathPrefix), "Unexpected path [%s] returned when listing files under [%s]", path, tableLocation);
String relativePath = path.toString().substring(commonPathPrefix.length());
if (relativePath.isEmpty()) {
// A file returned for "tableLocation/", might be possible on S3.
continue;
}
allPathsChecked++;
// TODO Note: Databricks can delete directories during vacuum on s3. This might need to be revisited.
if (!fileStatus.isFile()) {
nonFiles++;
continue;
}
// ignore tableLocation/_delta_log/**
if (relativePath.equals(TRANSACTION_LOG_DIRECTORY) || relativePath.startsWith(TRANSACTION_LOG_DIRECTORY + "/")) {
log.debug("[%s] skipping a file inside transaction log dir: %s", queryId, path);
transactionLogFiles++;
continue;
}
// skip retained files
if (retainedPaths.contains(relativePath)) {
log.debug("[%s] retaining a known file: %s", queryId, path);
retainedKnownFiles++;
continue;
}
// ignore recently created files
long modificationTime = fileStatus.getModificationTime();
Instant modificationInstant = Instant.ofEpochMilli(modificationTime);
if (!modificationInstant.isBefore(threshold)) {
log.debug("[%s] retaining an unknown file %s with modification time %s (%s)", queryId, path, modificationTime, modificationInstant);
retainedUnknownFiles++;
continue;
}
log.debug("[%s] deleting file [%s] with modification time %s (%s)", queryId, path, modificationTime, modificationInstant);
if (!fileSystem.delete(path, false)) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to delete file: " + path);
}
removedFiles++;
}
log.info("[%s] finished vacuuming table %s [%s]: files checked: %s; non-files: %s; metadata files: %s; retained known files: %s; retained unknown files: %s; removed files: %s", queryId, tableName, tableLocation, allPathsChecked, nonFiles, transactionLogFiles, retainedKnownFiles, retainedUnknownFiles, removedFiles);
}
use of io.trino.plugin.base.CatalogName in project trino by trinodb.
the class JdbcModule method setup.
@Override
public void setup(Binder binder) {
binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName));
install(new JdbcDiagnosticModule());
install(new IdentifierMappingModule());
newOptionalBinder(binder, ConnectorAccessControl.class);
newOptionalBinder(binder, QueryBuilder.class).setDefault().to(DefaultQueryBuilder.class).in(Scopes.SINGLETON);
procedureBinder(binder);
tablePropertiesProviderBinder(binder);
newOptionalBinder(binder, JdbcMetadataFactory.class).setDefault().to(DefaultJdbcMetadataFactory.class).in(Scopes.SINGLETON);
newOptionalBinder(binder, ConnectorSplitManager.class).setDefault().to(JdbcSplitManager.class).in(Scopes.SINGLETON);
newOptionalBinder(binder, ConnectorRecordSetProvider.class).setDefault().to(JdbcRecordSetProvider.class).in(Scopes.SINGLETON);
newOptionalBinder(binder, ConnectorPageSinkProvider.class).setDefault().to(JdbcPageSinkProvider.class).in(Scopes.SINGLETON);
binder.bind(JdbcConnector.class).in(Scopes.SINGLETON);
configBinder(binder).bindConfig(JdbcMetadataConfig.class);
configBinder(binder).bindConfig(JdbcWriteConfig.class);
configBinder(binder).bindConfig(BaseJdbcConfig.class);
configBinder(binder).bindConfig(TypeHandlingJdbcConfig.class);
bindSessionPropertiesProvider(binder, TypeHandlingJdbcSessionProperties.class);
bindSessionPropertiesProvider(binder, JdbcMetadataSessionProperties.class);
bindSessionPropertiesProvider(binder, JdbcWriteSessionProperties.class);
binder.bind(CachingJdbcClient.class).in(Scopes.SINGLETON);
binder.bind(JdbcClient.class).to(Key.get(CachingJdbcClient.class)).in(Scopes.SINGLETON);
newSetBinder(binder, Procedure.class).addBinding().toProvider(FlushJdbcMetadataCacheProcedure.class).in(Scopes.SINGLETON);
binder.bind(ConnectionFactory.class).annotatedWith(ForLazyConnectionFactory.class).to(Key.get(ConnectionFactory.class, StatsCollecting.class)).in(Scopes.SINGLETON);
binder.bind(ConnectionFactory.class).to(LazyConnectionFactory.class).in(Scopes.SINGLETON);
newOptionalBinder(binder, Key.get(int.class, MaxDomainCompactionThreshold.class));
newOptionalBinder(binder, Key.get(ExecutorService.class, ForRecordCursor.class)).setDefault().toProvider(MoreExecutors::newDirectExecutorService).in(Scopes.SINGLETON);
}
use of io.trino.plugin.base.CatalogName in project trino by trinodb.
the class InternalDeltaLakeConnectorFactory method createConnector.
@VisibleForTesting
public static Connector createConnector(String catalogName, Map<String, String> config, ConnectorContext context, Module extraModule) {
ClassLoader classLoader = InternalDeltaLakeConnectorFactory.class.getClassLoader();
try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
Bootstrap app = new Bootstrap(new EventModule(), new MBeanModule(), new JsonModule(), new MBeanServerModule(), new HiveHdfsModule(), new HiveS3Module(), new HiveAzureModule(), new HdfsAuthenticationModule(), new CatalogNameModule(catalogName), new DeltaLakeMetastoreModule(), new DeltaLakeModule(), binder -> {
binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion()));
binder.bind(NodeManager.class).toInstance(context.getNodeManager());
binder.bind(TypeManager.class).toInstance(context.getTypeManager());
binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory());
binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName));
newSetBinder(binder, EventListener.class);
}, binder -> bindSessionPropertiesProvider(binder, DeltaLakeSessionProperties.class), extraModule);
Injector injector = app.doNotInitializeLogging().setRequiredConfigurationProperties(config).initialize();
LifeCycleManager lifeCycleManager = injector.getInstance(LifeCycleManager.class);
ConnectorSplitManager splitManager = injector.getInstance(ConnectorSplitManager.class);
ConnectorPageSourceProvider connectorPageSource = injector.getInstance(ConnectorPageSourceProvider.class);
ConnectorPageSinkProvider connectorPageSink = injector.getInstance(ConnectorPageSinkProvider.class);
ConnectorNodePartitioningProvider connectorDistributionProvider = injector.getInstance(ConnectorNodePartitioningProvider.class);
Set<SessionPropertiesProvider> sessionPropertiesProviders = injector.getInstance(Key.get(new TypeLiteral<Set<SessionPropertiesProvider>>() {
}));
DeltaLakeTableProperties deltaLakeTableProperties = injector.getInstance(DeltaLakeTableProperties.class);
DeltaLakeAnalyzeProperties deltaLakeAnalyzeProperties = injector.getInstance(DeltaLakeAnalyzeProperties.class);
DeltaLakeTransactionManager transactionManager = injector.getInstance(DeltaLakeTransactionManager.class);
Set<EventListener> eventListeners = injector.getInstance(Key.get(new TypeLiteral<Set<EventListener>>() {
})).stream().map(listener -> new ClassLoaderSafeEventListener(listener, classLoader)).collect(toImmutableSet());
Set<Procedure> procedures = injector.getInstance(Key.get(new TypeLiteral<Set<Procedure>>() {
}));
Set<TableProcedureMetadata> tableProcedures = injector.getInstance(Key.get(new TypeLiteral<Set<TableProcedureMetadata>>() {
}));
return new DeltaLakeConnector(lifeCycleManager, new ClassLoaderSafeConnectorSplitManager(splitManager, classLoader), new ClassLoaderSafeConnectorPageSourceProvider(connectorPageSource, classLoader), new ClassLoaderSafeConnectorPageSinkProvider(connectorPageSink, classLoader), new ClassLoaderSafeNodePartitioningProvider(connectorDistributionProvider, classLoader), ImmutableSet.of(), procedures, tableProcedures, sessionPropertiesProviders, DeltaLakeSchemaProperties.SCHEMA_PROPERTIES, deltaLakeTableProperties.getTableProperties(), deltaLakeAnalyzeProperties.getAnalyzeProperties(), eventListeners, transactionManager);
}
}
use of io.trino.plugin.base.CatalogName in project trino by trinodb.
the class InternalIcebergConnectorFactory method createConnector.
public static Connector createConnector(String catalogName, Map<String, String> config, ConnectorContext context, Module module, Optional<HiveMetastore> metastore, Optional<FileIoProvider> fileIoProvider) {
ClassLoader classLoader = InternalIcebergConnectorFactory.class.getClassLoader();
try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
Bootstrap app = new Bootstrap(new EventModule(), new MBeanModule(), new ConnectorObjectNameGeneratorModule(catalogName, "io.trino.plugin.iceberg", "trino.plugin.iceberg"), new JsonModule(), new IcebergModule(), new IcebergSecurityModule(), new IcebergCatalogModule(metastore), new HiveHdfsModule(), new HiveS3Module(), new HiveGcsModule(), new HiveAzureModule(), new HdfsAuthenticationModule(), new MBeanServerModule(), fileIoProvider.<Module>map(provider -> binder -> binder.bind(FileIoProvider.class).toInstance(provider)).orElse(binder -> binder.bind(FileIoProvider.class).to(HdfsFileIoProvider.class).in(SINGLETON)), binder -> {
binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion()));
binder.bind(NodeManager.class).toInstance(context.getNodeManager());
binder.bind(TypeManager.class).toInstance(context.getTypeManager());
binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory());
binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName));
}, module);
Injector injector = app.doNotInitializeLogging().setRequiredConfigurationProperties(config).initialize();
LifeCycleManager lifeCycleManager = injector.getInstance(LifeCycleManager.class);
IcebergTransactionManager transactionManager = injector.getInstance(IcebergTransactionManager.class);
ConnectorSplitManager splitManager = injector.getInstance(ConnectorSplitManager.class);
ConnectorPageSourceProvider connectorPageSource = injector.getInstance(ConnectorPageSourceProvider.class);
ConnectorPageSinkProvider pageSinkProvider = injector.getInstance(ConnectorPageSinkProvider.class);
ConnectorNodePartitioningProvider connectorDistributionProvider = injector.getInstance(ConnectorNodePartitioningProvider.class);
Set<SessionPropertiesProvider> sessionPropertiesProviders = injector.getInstance(Key.get(new TypeLiteral<Set<SessionPropertiesProvider>>() {
}));
IcebergTableProperties icebergTableProperties = injector.getInstance(IcebergTableProperties.class);
Set<Procedure> procedures = injector.getInstance(Key.get(new TypeLiteral<Set<Procedure>>() {
}));
Set<TableProcedureMetadata> tableProcedures = injector.getInstance(Key.get(new TypeLiteral<Set<TableProcedureMetadata>>() {
}));
Optional<ConnectorAccessControl> accessControl = injector.getInstance(Key.get(new TypeLiteral<Optional<ConnectorAccessControl>>() {
}));
return new IcebergConnector(lifeCycleManager, transactionManager, new ClassLoaderSafeConnectorSplitManager(splitManager, classLoader), new ClassLoaderSafeConnectorPageSourceProvider(connectorPageSource, classLoader), new ClassLoaderSafeConnectorPageSinkProvider(pageSinkProvider, classLoader), new ClassLoaderSafeNodePartitioningProvider(connectorDistributionProvider, classLoader), sessionPropertiesProviders, IcebergSchemaProperties.SCHEMA_PROPERTIES, icebergTableProperties.getTableProperties(), accessControl, procedures, tableProcedures);
}
}
Aggregations