use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.
the class TestHiveFileFormats method testPageSourceFactory.
private void testPageSourceFactory(HivePageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testReadColumns, ConnectorSession session, long fileSize, int rowCount) throws IOException {
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerde());
// Use full columns in split properties
ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
Set<String> baseColumnNames = new HashSet<>();
for (TestColumn testReadColumn : testReadColumns) {
String name = testReadColumn.getBaseName();
if (!baseColumnNames.contains(name) && !testReadColumn.isPartitionKey()) {
baseColumnNames.add(name);
splitPropertiesColumnNames.add(name);
splitPropertiesColumnTypes.add(testReadColumn.getBaseObjectInspector().getTypeName());
}
}
splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
List<HivePartitionKey> partitionKeys = testReadColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
List<HiveColumnHandle> columnHandles = getColumnHandles(testReadColumns);
List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), fileSize, Instant.now().toEpochMilli());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(sourceFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), fileSize, splitProperties, TupleDomain.all(), columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
assertTrue(pageSource.isPresent());
checkPageSource(pageSource.get(), testReadColumns, getTypes(columnHandles), rowCount);
}
use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.
the class TestHiveFileFormats method testOptimizedParquetWriter.
@Test(dataProvider = "rowCount")
public void testOptimizedParquetWriter(int rowCount) throws Exception {
ConnectorSession session = getHiveSession(new HiveConfig(), new ParquetWriterConfig().setParquetOptimizedWriterEnabled(true));
assertTrue(HiveSessionProperties.isParquetOptimizedWriterEnabled(session));
List<TestColumn> testColumns = getTestColumnsSupportedByParquet();
assertThatFileFormat(PARQUET).withSession(session).withColumns(testColumns).withRowsCount(rowCount).withFileWriterFactory(new ParquetFileWriterFactory(HDFS_ENVIRONMENT, new NodeVersion("test-version"), TESTING_TYPE_MANAGER)).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
}
use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.
the class TestBackgroundHiveSplitLoader method backgroundHiveSplitLoader.
private BackgroundHiveSplitLoader backgroundHiveSplitLoader(List<LocatedFileStatus> files, DirectoryLister directoryLister) {
List<HivePartitionMetadata> hivePartitionMetadatas = ImmutableList.of(new HivePartitionMetadata(new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.empty(), TableToPartitionMapping.empty()));
ConnectorSession connectorSession = getHiveSession(new HiveConfig().setMaxSplitSize(DataSize.of(1, GIGABYTE)));
return new BackgroundHiveSplitLoader(SIMPLE_TABLE, NO_ACID_TRANSACTION, hivePartitionMetadatas, TupleDomain.none(), DynamicFilter.EMPTY, new Duration(0, SECONDS), TESTING_TYPE_MANAGER, Optional.empty(), connectorSession, new TestingHdfsEnvironment(files), new NamenodeStats(), directoryLister, executor, 2, false, false, true, Optional.empty(), Optional.empty());
}
use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.
the class TestOrcPageSourceMemoryTracking method testMaxReadBytes.
@Test(dataProvider = "rowCount")
public void testMaxReadBytes(int rowCount) throws Exception {
int maxReadBytes = 1_000;
HiveSessionProperties hiveSessionProperties = new HiveSessionProperties(new HiveConfig(), new OrcReaderConfig().setMaxBlockSize(DataSize.ofBytes(maxReadBytes)), new OrcWriterConfig(), new ParquetReaderConfig(), new ParquetWriterConfig());
ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(hiveSessionProperties.getSessionProperties()).build();
FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
// Build a table where every row gets larger, so we can test that the "batchSize" reduces
int numColumns = 5;
int step = 250;
ImmutableList.Builder<TestColumn> columnBuilder = ImmutableList.<TestColumn>builder().add(new TestColumn("p_empty_string", javaStringObjectInspector, () -> "", true));
GrowingTestColumn[] dataColumns = new GrowingTestColumn[numColumns];
for (int i = 0; i < numColumns; i++) {
dataColumns[i] = new GrowingTestColumn("p_string" + "_" + i, javaStringObjectInspector, () -> Long.toHexString(random.nextLong()), false, step * (i + 1));
columnBuilder.add(dataColumns[i]);
}
List<TestColumn> testColumns = columnBuilder.build();
File tempFile = File.createTempFile("trino_test_orc_page_source_max_read_bytes", "orc");
tempFile.delete();
TestPreparer testPreparer = new TestPreparer(tempFile.getAbsolutePath(), testColumns, rowCount, rowCount);
ConnectorPageSource pageSource = testPreparer.newPageSource(stats, session);
try {
int positionCount = 0;
while (true) {
Page page = pageSource.getNextPage();
if (pageSource.isFinished()) {
break;
}
assertNotNull(page);
page = page.getLoadedPage();
positionCount += page.getPositionCount();
// ignore the first MAX_BATCH_SIZE rows given the sizes are set when loading the blocks
if (positionCount > MAX_BATCH_SIZE) {
// either the block is bounded by maxReadBytes or we just load one single large block
// an error margin MAX_BATCH_SIZE / step is needed given the block sizes are increasing
assertTrue(page.getSizeInBytes() < maxReadBytes * (MAX_BATCH_SIZE / step) || 1 == page.getPositionCount());
}
}
// verify the stats are correctly recorded
Distribution distribution = stats.getMaxCombinedBytesPerRow().getAllTime();
assertEquals((int) distribution.getCount(), 1);
// the block is VariableWidthBlock that contains valueIsNull and offsets arrays as overhead
assertEquals((int) distribution.getMax(), Arrays.stream(dataColumns).mapToInt(GrowingTestColumn::getMaxSize).sum() + (Integer.BYTES + Byte.BYTES) * numColumns);
pageSource.close();
} finally {
tempFile.delete();
}
}
use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.
the class TestPushProjectionIntoTableScan method createMockFactory.
private MockConnectorFactory createMockFactory(Map<String, ColumnHandle> assignments, Optional<MockConnectorFactory.ApplyProjection> applyProjection) {
List<ColumnMetadata> metadata = assignments.entrySet().stream().map(entry -> new ColumnMetadata(entry.getKey(), ((TpchColumnHandle) entry.getValue()).getType())).collect(toImmutableList());
MockConnectorFactory.Builder builder = MockConnectorFactory.builder().withListSchemaNames(connectorSession -> ImmutableList.of(TEST_SCHEMA)).withListTables((connectorSession, schema) -> TEST_SCHEMA.equals(schema) ? ImmutableList.of(TEST_SCHEMA_TABLE) : ImmutableList.of()).withGetColumns(schemaTableName -> metadata).withGetTableProperties((session, tableHandle) -> {
MockConnectorTableHandle mockTableHandle = (MockConnectorTableHandle) tableHandle;
if (mockTableHandle.getTableName().getTableName().equals(TEST_TABLE)) {
return new ConnectorTableProperties(TupleDomain.all(), Optional.of(new ConnectorTablePartitioning(PARTITIONING_HANDLE, ImmutableList.of(column("col", VARCHAR)))), Optional.empty(), Optional.empty(), ImmutableList.of());
}
return new ConnectorTableProperties();
});
if (applyProjection.isPresent()) {
builder = builder.withApplyProjection(applyProjection.get());
}
return builder.build();
}
Aggregations