use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.
the class TestRubixCaching method getCachingFileSystem.
private FileSystem getCachingFileSystem(HdfsContext context, Path path) throws IOException {
HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of());
HiveHdfsConfiguration configuration = new HiveHdfsConfiguration(configurationInitializer, ImmutableSet.of(rubixConfigInitializer, (dynamicConfig, ignoredContext, ignoredUri) -> {
dynamicConfig.set("fs.file.impl", CachingLocalFileSystem.class.getName());
dynamicConfig.setBoolean("fs.gs.lazy.init.enable", true);
dynamicConfig.set("fs.azure.account.key", "Zm9vCg==");
dynamicConfig.set("fs.adl.oauth2.client.id", "test");
dynamicConfig.set("fs.adl.oauth2.refresh.url", "http://localhost");
dynamicConfig.set("fs.adl.oauth2.credential", "password");
}));
HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
return environment.getFileSystem(context, path);
}
use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.
the class DeltaLakeMetadata method createTable.
@Override
public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) {
SchemaTableName schemaTableName = tableMetadata.getTable();
String schemaName = schemaTableName.getSchemaName();
String tableName = schemaTableName.getTableName();
Database schema = metastore.getDatabase(schemaName).orElseThrow(() -> new SchemaNotFoundException(schemaName));
boolean external = true;
String location = getLocation(tableMetadata.getProperties());
if (location == null) {
Optional<String> schemaLocation = getSchemaLocation(schema);
if (schemaLocation.isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "The 'location' property must be specified either for the table or the schema");
}
location = new Path(schemaLocation.get(), tableName).toString();
checkPathContainsNoFiles(session, new Path(location));
external = false;
}
Path targetPath = new Path(location);
ensurePathExists(session, targetPath);
Path deltaLogDirectory = getTransactionLogDir(targetPath);
Optional<Long> checkpointInterval = DeltaLakeTableProperties.getCheckpointInterval(tableMetadata.getProperties());
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), targetPath);
if (!fileSystem.exists(deltaLogDirectory)) {
validateTableColumns(tableMetadata);
List<String> partitionColumns = getPartitionedBy(tableMetadata.getProperties());
List<DeltaLakeColumnHandle> deltaLakeColumns = tableMetadata.getColumns().stream().map(column -> toColumnHandle(column, partitionColumns)).collect(toImmutableList());
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriterWithoutTransactionIsolation(session, targetPath.toString());
appendInitialTableEntries(transactionLogWriter, deltaLakeColumns, partitionColumns, buildDeltaMetadataConfiguration(checkpointInterval), CREATE_TABLE_OPERATION, session, nodeVersion, nodeId);
setRollback(() -> deleteRecursivelyIfExists(new HdfsContext(session), hdfsEnvironment, deltaLogDirectory));
transactionLogWriter.flush();
}
} catch (IOException e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Unable to access file system for: " + location, e);
}
Table.Builder tableBuilder = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(Optional.of(session.getUser())).setTableType(external ? EXTERNAL_TABLE.name() : MANAGED_TABLE.name()).setDataColumns(DUMMY_DATA_COLUMNS).setParameters(deltaTableProperties(session, location, external));
setDeltaStorageFormat(tableBuilder, location, targetPath);
Table table = tableBuilder.build();
PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow());
metastore.createTable(session, table, principalPrivileges);
}
use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.
the class DeltaLakePageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
DeltaLakeSplit split = (DeltaLakeSplit) connectorSplit;
DeltaLakeTableHandle table = (DeltaLakeTableHandle) connectorTable;
// We reach here when we could not prune the split using file level stats, table predicate
// and the dynamic filter in the coordinator during split generation. The file level stats
// in DeltaLakeSplit#filePredicate could help to prune this split when a more selective dynamic filter
// is available now, without having to access parquet file footer for row-group stats.
// We avoid sending DeltaLakeSplit#splitPredicate to workers by using table.getPredicate() here.
TupleDomain<DeltaLakeColumnHandle> filteredSplitPredicate = TupleDomain.intersect(ImmutableList.of(table.getNonPartitionConstraint(), split.getStatisticsPredicate(), dynamicFilter.getCurrentPredicate().transformKeys(DeltaLakeColumnHandle.class::cast)));
if (filteredSplitPredicate.isNone()) {
return new EmptyPageSource();
}
List<DeltaLakeColumnHandle> deltaLakeColumns = columns.stream().map(DeltaLakeColumnHandle.class::cast).collect(toImmutableList());
Map<String, Optional<String>> partitionKeys = split.getPartitionKeys();
List<DeltaLakeColumnHandle> regularColumns = deltaLakeColumns.stream().filter(column -> column.getColumnType() == REGULAR).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = regularColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList());
Path path = new Path(split.getPath());
HdfsContext hdfsContext = new HdfsContext(session);
TupleDomain<HiveColumnHandle> parquetPredicate = getParquetTupleDomain(filteredSplitPredicate.simplify(domainCompactionThreshold));
if (table.getWriteType().isPresent()) {
return new DeltaLakeUpdatablePageSource(table, deltaLakeColumns, partitionKeys, split.getPath(), split.getFileSize(), split.getFileModifiedTime(), session, executorService, hdfsEnvironment, hdfsContext, parquetDateTimeZone, parquetReaderOptions, parquetPredicate, typeManager, updateResultJsonCodec);
}
ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource(path, split.getStart(), split.getLength(), split.getFileSize(), hiveColumnHandles, parquetPredicate, true, hdfsEnvironment, hdfsEnvironment.getConfiguration(hdfsContext, path), session.getIdentity(), parquetDateTimeZone, fileFormatDataSourceStats, parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)).withUseColumnIndex(isParquetUseColumnIndex(session)));
verify(pageSource.getReaderColumns().isEmpty(), "All columns expected to be base columns");
return new DeltaLakePageSource(deltaLakeColumns, partitionKeys, pageSource.get(), split.getPath(), split.getFileSize(), split.getFileModifiedTime());
}
use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.
the class TestIcebergTableWithExternalLocation method createQueryRunner.
@Override
protected DistributedQueryRunner createQueryRunner() throws Exception {
metastoreDir = Files.createTempDirectory("test_iceberg").toFile();
HdfsConfig hdfsConfig = new HdfsConfig();
HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
FileHiveMetastoreConfig config = new FileHiveMetastoreConfig().setCatalogDirectory(metastoreDir.toURI().toString()).setMetastoreUser("test");
hdfsContext = new HdfsContext(ConnectorIdentity.ofUser(config.getMetastoreUser()));
metastore = new FileHiveMetastore(new NodeVersion("testversion"), hdfsEnvironment, new MetastoreConfig(), config);
return createIcebergQueryRunner(ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(), Optional.of(metastoreDir));
}
use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.
the class TestIcebergV2 method createQueryRunner.
@Override
protected QueryRunner createQueryRunner() throws Exception {
HdfsConfig config = new HdfsConfig();
HdfsConfiguration configuration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(config), ImmutableSet.of());
hdfsEnvironment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
tempDir = Files.createTempDirectory("test_iceberg_v2");
metastoreDir = tempDir.resolve("iceberg_data").toFile();
metastore = createTestingFileHiveMetastore(metastoreDir);
return createIcebergQueryRunner(ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(NATION), Optional.of(metastoreDir));
}
Aggregations