use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.
the class TestOrcPageSourceFactory method readFile.
private static List<Nation> readFile(Map<NationColumn, Integer> columns, OptionalLong nationKeyPredicate, Optional<AcidInfo> acidInfo, String filePath, long fileSize) {
TupleDomain<HiveColumnHandle> tupleDomain = TupleDomain.all();
if (nationKeyPredicate.isPresent()) {
tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(toHiveColumnHandle(NATION_KEY, 0), Domain.singleValue(INTEGER, nationKeyPredicate.getAsLong())));
}
List<HiveColumnHandle> columnHandles = columns.entrySet().stream().map(entry -> toHiveColumnHandle(entry.getKey(), entry.getValue())).collect(toImmutableList());
List<String> columnNames = columnHandles.stream().map(HiveColumnHandle::getName).collect(toImmutableList());
Optional<ReaderPageSource> pageSourceWithProjections = PAGE_SOURCE_FACTORY.createPageSource(new JobConf(new Configuration(false)), SESSION, new Path(filePath), 0, fileSize, fileSize, createSchema(), columnHandles, tupleDomain, acidInfo, OptionalInt.empty(), false, NO_ACID_TRANSACTION);
checkArgument(pageSourceWithProjections.isPresent());
checkArgument(pageSourceWithProjections.get().getReaderColumns().isEmpty(), "projected columns not expected here");
ConnectorPageSource pageSource = pageSourceWithProjections.get().get();
int nationKeyColumn = columnNames.indexOf("n_nationkey");
int nameColumn = columnNames.indexOf("n_name");
int regionKeyColumn = columnNames.indexOf("n_regionkey");
int commentColumn = columnNames.indexOf("n_comment");
ImmutableList.Builder<Nation> rows = ImmutableList.builder();
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page == null) {
continue;
}
page = page.getLoadedPage();
for (int position = 0; position < page.getPositionCount(); position++) {
long nationKey = -42;
if (nationKeyColumn >= 0) {
nationKey = BIGINT.getLong(page.getBlock(nationKeyColumn), position);
}
String name = "<not read>";
if (nameColumn >= 0) {
name = VARCHAR.getSlice(page.getBlock(nameColumn), position).toStringUtf8();
}
long regionKey = -42;
if (regionKeyColumn >= 0) {
regionKey = BIGINT.getLong(page.getBlock(regionKeyColumn), position);
}
String comment = "<not read>";
if (commentColumn >= 0) {
comment = VARCHAR.getSlice(page.getBlock(commentColumn), position).toStringUtf8();
}
rows.add(new Nation(position, nationKey, name, regionKey, comment));
}
}
return rows.build();
}
use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.
the class TestOrcPredicates method assertFilteredRows.
private void assertFilteredRows(TupleDomain<TestColumn> effectivePredicate, List<TestColumn> columnsToRead, ConnectorSession session, FileSplit split, int expectedRows) {
ConnectorPageSource pageSource = createPageSource(effectivePredicate, columnsToRead, session, split);
int filteredRows = 0;
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page != null) {
filteredRows += page.getPositionCount();
}
}
assertEquals(filteredRows, expectedRows);
}
use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.
the class TestOrcPredicates method createPageSource.
private ConnectorPageSource createPageSource(TupleDomain<TestColumn> effectivePredicate, List<TestColumn> columnsToRead, ConnectorSession session, FileSplit split) {
OrcPageSourceFactory readerFactory = new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC);
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, ORC.getSerde());
// Use full columns in split properties
ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
Set<String> baseColumnNames = new HashSet<>();
for (TestColumn columnToRead : columnsToRead) {
String name = columnToRead.getBaseName();
if (!baseColumnNames.contains(name) && !columnToRead.isPartitionKey()) {
baseColumnNames.add(name);
splitPropertiesColumnNames.add(name);
splitPropertiesColumnTypes.add(columnToRead.getBaseObjectInspector().getTypeName());
}
}
splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
List<HivePartitionKey> partitionKeys = columnsToRead.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
List<HiveColumnHandle> columnHandles = getColumnHandles(columnsToRead);
TupleDomain<HiveColumnHandle> predicate = effectivePredicate.transformKeys(testColumn -> {
Optional<HiveColumnHandle> handle = columnHandles.stream().filter(column -> testColumn.getName().equals(column.getName())).findFirst();
checkState(handle.isPresent(), "Predicate on invalid column");
return handle.get();
});
List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), split.getLength(), Instant.now().toEpochMilli());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(readerFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), splitProperties, predicate, columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
assertTrue(pageSource.isPresent());
return pageSource.get();
}
use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.
the class DeltaLakeUpdatablePageSource method copyParquetPageSource.
private DataFileInfo copyParquetPageSource(DeltaLakeWriter fileWriter) throws IOException {
ReaderPageSource readerPageSource = createParquetPageSource(TupleDomain.all(), allDataColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList()));
ConnectorPageSource connectorPageSource = readerPageSource.get();
boolean successfulWrite = true;
try {
int pageStart = 0;
while (!connectorPageSource.isFinished()) {
Page page = connectorPageSource.getNextPage();
if (page == null) {
continue;
}
int pagePositionCount = page.getPositionCount();
int nextToDelete = rowsToDelete.nextSetBit(pageStart);
if (nextToDelete == -1 || nextToDelete >= pageStart + pagePositionCount) {
// page is wholly retained
} else {
int[] retainedPositions = new int[pagePositionCount];
int retainedPositionsCount = 0;
for (int position = 0; position < pagePositionCount; position++) {
if (!rowsToDelete.get(pageStart + position)) {
retainedPositions[retainedPositionsCount] = position;
retainedPositionsCount++;
}
}
page = page.getPositions(retainedPositions, 0, retainedPositionsCount);
}
fileWriter.appendRows(page);
pageStart += pagePositionCount;
}
} catch (Exception e) {
successfulWrite = false;
try {
fileWriter.rollback();
} catch (Exception rollbackException) {
if (e != rollbackException) {
e.addSuppressed(rollbackException);
}
}
throw e;
} finally {
if (successfulWrite) {
fileWriter.commit();
}
connectorPageSource.close();
}
return fileWriter.getDataFileInfo();
}
use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.
the class TestTimestamp method testReadingAs.
private void testReadingAs(Type type, ConnectorSession session, ParquetTester.TempFile tempFile, List<String> columnNames, List<?> expectedValues) throws IOException {
Iterator<?> expected = expectedValues.iterator();
try (ConnectorPageSource pageSource = StandardFileFormats.TRINO_PARQUET.createFileFormatReader(session, HDFS_ENVIRONMENT, tempFile.getFile(), columnNames, ImmutableList.of(type))) {
// skip a page to exercise the decoder's skip() logic
Page firstPage = pageSource.getNextPage();
assertTrue(firstPage.getPositionCount() > 0, "Expected first page to have at least 1 row");
for (int i = 0; i < firstPage.getPositionCount(); i++) {
expected.next();
}
int pageCount = 1;
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page == null) {
continue;
}
pageCount++;
Block block = page.getBlock(0);
for (int i = 0; i < block.getPositionCount(); i++) {
assertThat(type.getObjectValue(session, block, i)).isEqualTo(expected.next());
}
}
assertThat(pageCount).withFailMessage("Expected more than one page but processed %s", pageCount).isGreaterThan(1);
assertFalse(expected.hasNext(), "Read fewer values than expected");
}
}
Aggregations