use of io.trino.plugin.hive.HiveTestUtils.SESSION in project trino by trinodb.
the class AbstractTestHive method setup.
protected final void setup(String databaseName, HiveConfig hiveConfig, HiveMetastore hiveMetastore, HdfsEnvironment hdfsConfiguration) {
setupHive(databaseName);
metastoreClient = hiveMetastore;
hdfsEnvironment = hdfsConfiguration;
HivePartitionManager partitionManager = new HivePartitionManager(hiveConfig);
locationService = new HiveLocationService(hdfsEnvironment);
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
metadataFactory = new HiveMetadataFactory(new CatalogName("hive"), HiveMetastoreFactory.ofInstance(metastoreClient), hdfsEnvironment, partitionManager, 10, 10, 10, false, false, false, true, true, false, 1000, Optional.empty(), true, TESTING_TYPE_MANAGER, NOOP_METADATA_PROVIDER, locationService, partitionUpdateCodec, newFixedThreadPool(2), heartbeatService, TEST_SERVER_VERSION, (session, tableHandle) -> {
if (!tableHandle.getTableName().contains("apply_redirection_tester")) {
return Optional.empty();
}
return Optional.of(new TableScanRedirectApplicationResult(new CatalogSchemaTableName("hive", databaseName, "mock_redirection_target"), ImmutableMap.of(), TupleDomain.all()));
}, ImmutableSet.of(new PartitionsSystemTableProvider(partitionManager, TESTING_TYPE_MANAGER), new PropertiesSystemTableProvider()), metastore -> new NoneHiveMaterializedViewMetadata() {
@Override
public Optional<ConnectorMaterializedViewDefinition> getMaterializedView(ConnectorSession session, SchemaTableName viewName) {
if (!viewName.getTableName().contains("materialized_view_tester")) {
return Optional.empty();
}
return Optional.of(new ConnectorMaterializedViewDefinition("dummy_view_sql", Optional.empty(), Optional.empty(), Optional.empty(), ImmutableList.of(new ConnectorMaterializedViewDefinition.Column("abc", TypeId.of("type"))), Optional.empty(), Optional.of("alice"), ImmutableMap.of()));
}
}, SqlStandardAccessControlMetadata::new, NO_REDIRECTIONS, TableInvalidationCallback.NOOP);
transactionManager = new HiveTransactionManager(metadataFactory);
splitManager = new HiveSplitManager(transactionManager, partitionManager, new NamenodeStats(), hdfsEnvironment, new CachingDirectoryLister(hiveConfig), directExecutor(), new CounterStat(), 100, hiveConfig.getMaxOutstandingSplitsSize(), hiveConfig.getMinPartitionBatchSize(), hiveConfig.getMaxPartitionBatchSize(), hiveConfig.getMaxInitialSplits(), hiveConfig.getSplitLoaderConcurrency(), hiveConfig.getMaxSplitsPerSecond(), false, TESTING_TYPE_MANAGER);
pageSinkProvider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), hdfsEnvironment, PAGE_SORTER, HiveMetastoreFactory.ofInstance(metastoreClient), new GroupByHashPageIndexerFactory(JOIN_COMPILER, BLOCK_TYPE_OPERATORS), TESTING_TYPE_MANAGER, getHiveConfig(), locationService, partitionUpdateCodec, new TestingNodeManager("fake-environment"), new HiveEventClient(), getHiveSessionProperties(hiveConfig), new HiveWriterStats());
pageSourceProvider = new HivePageSourceProvider(TESTING_TYPE_MANAGER, hdfsEnvironment, hiveConfig, getDefaultHivePageSourceFactories(hdfsEnvironment, hiveConfig), getDefaultHiveRecordCursorProviders(hiveConfig, hdfsEnvironment), new GenericHiveRecordCursorProvider(hdfsEnvironment, hiveConfig), Optional.empty());
nodePartitioningProvider = new HiveNodePartitioningProvider(new TestingNodeManager("fake-environment"), TESTING_TYPE_MANAGER);
}
use of io.trino.plugin.hive.HiveTestUtils.SESSION in project trino by trinodb.
the class AbstractTestHive method testApplyProjection.
@Test
public void testApplyProjection() throws Exception {
ColumnMetadata bigIntColumn0 = new ColumnMetadata("int0", BIGINT);
ColumnMetadata bigIntColumn1 = new ColumnMetadata("int1", BIGINT);
RowType oneLevelRowType = toRowType(ImmutableList.of(bigIntColumn0, bigIntColumn1));
ColumnMetadata oneLevelRow0 = new ColumnMetadata("onelevelrow0", oneLevelRowType);
RowType twoLevelRowType = toRowType(ImmutableList.of(oneLevelRow0, bigIntColumn0, bigIntColumn1));
ColumnMetadata twoLevelRow0 = new ColumnMetadata("twolevelrow0", twoLevelRowType);
List<ColumnMetadata> columnsForApplyProjectionTest = ImmutableList.of(bigIntColumn0, bigIntColumn1, oneLevelRow0, twoLevelRow0);
SchemaTableName tableName = temporaryTable("apply_projection_tester");
doCreateEmptyTable(tableName, ORC, columnsForApplyProjectionTest);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
assertEquals(columnHandles.size(), columnsForApplyProjectionTest.size());
Map<String, ColumnHandle> columnHandleMap = columnHandles.stream().collect(toImmutableMap(handle -> ((HiveColumnHandle) handle).getBaseColumnName(), Function.identity()));
// Emulate symbols coming from the query plan and map them to column handles
Map<String, ColumnHandle> columnHandlesWithSymbols = ImmutableMap.of("symbol_0", columnHandleMap.get("int0"), "symbol_1", columnHandleMap.get("int1"), "symbol_2", columnHandleMap.get("onelevelrow0"), "symbol_3", columnHandleMap.get("twolevelrow0"));
// Create variables for the emulated symbols
Map<String, Variable> symbolVariableMapping = columnHandlesWithSymbols.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, e -> new Variable(e.getKey(), ((HiveColumnHandle) e.getValue()).getBaseType())));
// Create dereference expressions for testing
FieldDereference symbol2Field0 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_2"), 0);
FieldDereference symbol3Field0 = new FieldDereference(oneLevelRowType, symbolVariableMapping.get("symbol_3"), 0);
FieldDereference symbol3Field0Field0 = new FieldDereference(BIGINT, symbol3Field0, 0);
FieldDereference symbol3Field1 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_3"), 1);
Map<String, ColumnHandle> inputAssignments;
List<ConnectorExpression> inputProjections;
Optional<ProjectionApplicationResult<ConnectorTableHandle>> projectionResult;
List<ConnectorExpression> expectedProjections;
Map<String, Type> expectedAssignments;
// Test projected columns pushdown to HiveTableHandle in case of all variable references
inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_0", "symbol_1"));
inputProjections = ImmutableList.of(symbolVariableMapping.get("symbol_0"), symbolVariableMapping.get("symbol_1"));
expectedAssignments = ImmutableMap.of("symbol_0", BIGINT, "symbol_1", BIGINT);
projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments);
// Empty result when projected column handles are same as those present in table handle
projectionResult = metadata.applyProjection(session, projectionResult.get().getHandle(), inputProjections, inputAssignments);
assertProjectionResult(projectionResult, true, ImmutableList.of(), ImmutableMap.of());
// Extra columns handles in HiveTableHandle should get pruned
projectionResult = metadata.applyProjection(session, ((HiveTableHandle) tableHandle).withProjectedColumns(ImmutableSet.copyOf(columnHandles)), inputProjections, inputAssignments);
assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments);
// Test projection pushdown for dereferences
inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2", "symbol_3"));
inputProjections = ImmutableList.of(symbol2Field0, symbol3Field0Field0, symbol3Field1);
expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT, "twolevelrow0#f_onelevelrow0#f_int0", BIGINT, "twolevelrow0#f_int0", BIGINT);
expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), new Variable("twolevelrow0#f_onelevelrow0#f_int0", BIGINT), new Variable("twolevelrow0#f_int0", BIGINT));
projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
// Test reuse of virtual column handles
// Round-1: input projections [symbol_2, symbol_2.int0]. virtual handle is created for symbol_2.int0.
inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2"));
inputProjections = ImmutableList.of(symbol2Field0, symbolVariableMapping.get("symbol_2"));
projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), symbolVariableMapping.get("symbol_2"));
expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT, "symbol_2", oneLevelRowType);
assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
// Round-2: input projections [symbol_2.int0 and onelevelrow0#f_int0]. Virtual handle is reused.
Assignment newlyCreatedColumn = getOnlyElement(projectionResult.get().getAssignments().stream().filter(handle -> handle.getVariable().equals("onelevelrow0#f_int0")).collect(toList()));
inputAssignments = ImmutableMap.<String, ColumnHandle>builder().putAll(getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2"))).put(newlyCreatedColumn.getVariable(), newlyCreatedColumn.getColumn()).buildOrThrow();
inputProjections = ImmutableList.of(symbol2Field0, new Variable("onelevelrow0#f_int0", BIGINT));
projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), new Variable("onelevelrow0#f_int0", BIGINT));
expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT);
assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
} finally {
dropTable(tableName);
}
}
use of io.trino.plugin.hive.HiveTestUtils.SESSION in project trino by trinodb.
the class AbstractTestHiveFileFormats method createTestFileTrino.
public static FileSplit createTestFileTrino(String filePath, HiveStorageFormat storageFormat, HiveCompressionCodec compressionCodec, List<TestColumn> testColumns, ConnectorSession session, int numRows, HiveFileWriterFactory fileWriterFactory) {
// filter out partition keys, which are not written to the file
testColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).collect(toImmutableList());
List<Type> types = testColumns.stream().map(TestColumn::getType).map(HiveType::valueOf).map(type -> type.getType(TESTING_TYPE_MANAGER)).collect(toList());
PageBuilder pageBuilder = new PageBuilder(types);
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
pageBuilder.declarePosition();
for (int columnNumber = 0; columnNumber < testColumns.size(); columnNumber++) {
serializeObject(types.get(columnNumber), pageBuilder.getBlockBuilder(columnNumber), testColumns.get(columnNumber).getWriteValue(), testColumns.get(columnNumber).getObjectInspector(), false);
}
}
Page page = pageBuilder.build();
JobConf jobConf = new JobConf();
configureCompression(jobConf, compressionCodec);
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", testColumns.stream().map(TestColumn::getName).collect(Collectors.joining(",")));
tableProperties.setProperty("columns.types", testColumns.stream().map(TestColumn::getType).collect(Collectors.joining(",")));
Optional<FileWriter> fileWriter = fileWriterFactory.createFileWriter(new Path(filePath), testColumns.stream().map(TestColumn::getName).collect(toList()), StorageFormat.fromHiveStorageFormat(storageFormat), tableProperties, jobConf, session, OptionalInt.empty(), NO_ACID_TRANSACTION, false, WriterKind.INSERT);
FileWriter hiveFileWriter = fileWriter.orElseThrow(() -> new IllegalArgumentException("fileWriterFactory"));
hiveFileWriter.appendRows(page);
hiveFileWriter.commit();
return new FileSplit(new Path(filePath), 0, new File(filePath).length(), new String[0]);
}
use of io.trino.plugin.hive.HiveTestUtils.SESSION in project trino by trinodb.
the class TestHiveFileFormats method createPageSourceFromCursorProvider.
private ConnectorPageSource createPageSourceFromCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, Properties splitProperties, long fileSize, List<TestColumn> testReadColumns, ConnectorSession session) {
// Use full columns in split properties
ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
Set<String> baseColumnNames = new HashSet<>();
for (TestColumn testReadColumn : testReadColumns) {
String name = testReadColumn.getBaseName();
if (!baseColumnNames.contains(name) && !testReadColumn.isPartitionKey()) {
baseColumnNames.add(name);
splitPropertiesColumnNames.add(name);
splitPropertiesColumnTypes.add(testReadColumn.getBaseObjectInspector().getTypeName());
}
}
splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
List<HivePartitionKey> partitionKeys = testReadColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
Configuration configuration = new Configuration(false);
configuration.set("io.compression.codecs", LzoCodec.class.getName() + "," + LzopCodec.class.getName());
List<HiveColumnHandle> columnHandles = getColumnHandles(testReadColumns);
List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), fileSize, Instant.now().toEpochMilli());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(cursorProvider), configuration, session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), fileSize, splitProperties, TupleDomain.all(), columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
return pageSource.get();
}
use of io.trino.plugin.hive.HiveTestUtils.SESSION in project trino by trinodb.
the class TestHiveFileFormats method testPageSourceFactory.
private void testPageSourceFactory(HivePageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testReadColumns, ConnectorSession session, long fileSize, int rowCount) throws IOException {
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerde());
// Use full columns in split properties
ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
Set<String> baseColumnNames = new HashSet<>();
for (TestColumn testReadColumn : testReadColumns) {
String name = testReadColumn.getBaseName();
if (!baseColumnNames.contains(name) && !testReadColumn.isPartitionKey()) {
baseColumnNames.add(name);
splitPropertiesColumnNames.add(name);
splitPropertiesColumnTypes.add(testReadColumn.getBaseObjectInspector().getTypeName());
}
}
splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
List<HivePartitionKey> partitionKeys = testReadColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
List<HiveColumnHandle> columnHandles = getColumnHandles(testReadColumns);
List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), fileSize, Instant.now().toEpochMilli());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(sourceFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), fileSize, splitProperties, TupleDomain.all(), columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
assertTrue(pageSource.isPresent());
checkPageSource(pageSource.get(), testReadColumns, getTypes(columnHandles), rowCount);
}
Aggregations