use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.
the class SystemSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
SystemTableHandle table = (SystemTableHandle) tableHandle;
TupleDomain<ColumnHandle> constraint = table.getConstraint();
SystemTable systemTable = tables.getSystemTable(session, table.getSchemaTableName()).orElseThrow(() -> new TableNotFoundException(table.getSchemaTableName()));
Distribution tableDistributionMode = systemTable.getDistribution();
if (tableDistributionMode == SINGLE_COORDINATOR) {
HostAddress address = nodeManager.getCurrentNode().getHostAndPort();
ConnectorSplit split = new SystemSplit(address, constraint);
return new FixedSplitSource(ImmutableList.of(split));
}
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
ImmutableSet.Builder<InternalNode> nodes = ImmutableSet.builder();
if (tableDistributionMode == ALL_COORDINATORS) {
nodes.addAll(nodeManager.getCoordinators());
} else if (tableDistributionMode == ALL_NODES) {
nodes.addAll(nodeManager.getNodes(ACTIVE));
}
Set<InternalNode> nodeSet = nodes.build();
for (InternalNode node : nodeSet) {
splits.add(new SystemSplit(node.getHostAndPort(), constraint));
}
return new FixedSplitSource(splits.build());
}
use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.
the class ExampleSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle connectorTableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
ExampleTableHandle tableHandle = (ExampleTableHandle) connectorTableHandle;
ExampleTable table = exampleClient.getTable(tableHandle.getSchemaName(), tableHandle.getTableName());
// this can happen if table is removed during a query
if (table == null) {
throw new TableNotFoundException(tableHandle.toSchemaTableName());
}
List<ConnectorSplit> splits = new ArrayList<>();
for (URI uri : table.getSources()) {
splits.add(new ExampleSplit(uri.toString()));
}
Collections.shuffle(splits);
return new FixedSplitSource(splits);
}
use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.
the class HiveSplitSource method getNextBatch.
@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
boolean noMoreSplits;
State state = stateReference.get();
switch(state.getKind()) {
case INITIAL:
noMoreSplits = false;
break;
case NO_MORE_SPLITS:
noMoreSplits = true;
break;
case FAILED:
return failedFuture(state.getThrowable());
case CLOSED:
throw new IllegalStateException("HiveSplitSource is already closed");
default:
throw new UnsupportedOperationException();
}
OptionalInt bucketNumber = toBucketNumber(partitionHandle);
ListenableFuture<List<ConnectorSplit>> future = queues.borrowBatchAsync(bucketNumber, maxSize, internalSplits -> {
ImmutableList.Builder<InternalHiveSplit> splitsToInsertBuilder = ImmutableList.builder();
ImmutableList.Builder<ConnectorSplit> resultBuilder = ImmutableList.builder();
int removedEstimatedSizeInBytes = 0;
int removedSplitCount = 0;
for (InternalHiveSplit internalSplit : internalSplits) {
// Perform one more dynamic filter check immediately before split is returned to the engine
if (!internalSplit.getPartitionMatchSupplier().getAsBoolean()) {
removedEstimatedSizeInBytes += internalSplit.getEstimatedSizeInBytes();
removedSplitCount++;
continue;
}
long maxSplitBytes = maxSplitSize.toBytes();
if (remainingInitialSplits.get() > 0) {
if (remainingInitialSplits.getAndDecrement() > 0) {
maxSplitBytes = maxInitialSplitSize.toBytes();
}
}
InternalHiveBlock block = internalSplit.currentBlock();
long splitBytes;
if (internalSplit.isSplittable()) {
long remainingBlockBytes = block.getEnd() - internalSplit.getStart();
if (remainingBlockBytes <= maxSplitBytes) {
splitBytes = remainingBlockBytes;
} else if (maxSplitBytes * 2 >= remainingBlockBytes) {
// Second to last split in this block, generate two evenly sized splits
splitBytes = remainingBlockBytes / 2;
} else {
splitBytes = maxSplitBytes;
}
} else {
splitBytes = internalSplit.getEnd() - internalSplit.getStart();
}
resultBuilder.add(new HiveSplit(databaseName, tableName, internalSplit.getPartitionName(), internalSplit.getPath(), internalSplit.getStart(), splitBytes, internalSplit.getEstimatedFileSize(), internalSplit.getFileModifiedTime(), internalSplit.getSchema(), internalSplit.getPartitionKeys(), block.getAddresses(), internalSplit.getBucketNumber(), internalSplit.getStatementId(), internalSplit.isForceLocalScheduling(), internalSplit.getTableToPartitionMapping(), internalSplit.getBucketConversion(), internalSplit.getBucketValidation(), internalSplit.isS3SelectPushdownEnabled(), internalSplit.getAcidInfo(), numberOfProcessedSplits.getAndIncrement(), splitWeightProvider.weightForSplitSizeInBytes(splitBytes)));
internalSplit.increaseStart(splitBytes);
if (internalSplit.isDone()) {
removedEstimatedSizeInBytes += internalSplit.getEstimatedSizeInBytes();
removedSplitCount++;
} else {
splitsToInsertBuilder.add(internalSplit);
}
}
estimatedSplitSizeInBytes.addAndGet(-removedEstimatedSizeInBytes);
bufferedInternalSplitCount.addAndGet(-removedSplitCount);
List<InternalHiveSplit> splitsToInsert = splitsToInsertBuilder.build();
List<ConnectorSplit> result = resultBuilder.build();
return new AsyncQueue.BorrowResult<>(splitsToInsert, result);
});
ListenableFuture<ConnectorSplitBatch> transform = Futures.transform(future, splits -> {
requireNonNull(splits, "splits is null");
if (recordScannedFiles) {
splits.forEach(split -> scannedFilePaths.add(((HiveSplit) split).getPath()));
}
if (noMoreSplits) {
// But an extra invocation likely doesn't matter.
return new ConnectorSplitBatch(splits, splits.isEmpty() && queues.isFinished(bucketNumber));
} else {
return new ConnectorSplitBatch(splits, false);
}
}, directExecutor());
return toCompletableFuture(transform);
}
use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.
the class AbstractTestHive method doTestBucketSortedTables.
private void doTestBucketSortedTables(SchemaTableName table) throws IOException {
int bucketCount = 3;
int expectedRowCount = 0;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(table, ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", VARCHAR)).add(new ColumnMetadata("value_asc", VARCHAR)).add(new ColumnMetadata("value_desc", BIGINT)).add(new ColumnMetadata("ds", VARCHAR)).build(), ImmutableMap.<String, Object>builder().put(STORAGE_FORMAT_PROPERTY, RCBINARY).put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")).put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")).put(BUCKET_COUNT_PROPERTY, bucketCount).put(SORTED_BY_PROPERTY, ImmutableList.builder().add(new SortingColumn("value_asc", ASCENDING)).add(new SortingColumn("value_desc", DESCENDING)).build()).buildOrThrow());
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
// write the data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
List<Type> types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toList());
ThreadLocalRandom random = ThreadLocalRandom.current();
for (int i = 0; i < 50; i++) {
MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types);
for (int j = 0; j < 1000; j++) {
builder.row(sha256().hashLong(random.nextLong()).toString(), "test" + random.nextInt(100), random.nextLong(100_000), "2018-04-01");
expectedRowCount++;
}
sink.appendPage(builder.build().toPage());
}
HdfsContext context = new HdfsContext(session);
// verify we have enough temporary files per bucket to require multiple passes
Path stagingPathRoot;
if (isTemporaryStagingDirectoryEnabled(session)) {
stagingPathRoot = new Path(getTemporaryStagingDirectoryPath(session).replace("${USER}", context.getIdentity().getUser()));
} else {
stagingPathRoot = getStagingPathRoot(outputHandle);
}
assertThat(listAllDataFiles(context, stagingPathRoot)).filteredOn(file -> file.contains(".tmp-sort.")).size().isGreaterThan(bucketCount * getHiveConfig().getMaxOpenSortFiles() * 2);
// finish the write
Collection<Slice> fragments = getFutureValue(sink.finish());
// verify there are no temporary files
for (String file : listAllDataFiles(context, stagingPathRoot)) {
assertThat(file).doesNotContain(".tmp-sort.");
}
// finish creating table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
}
// verify that bucket files are sorted
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, table);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
// verify local sorting property
ConnectorTableProperties properties = metadata.getTableProperties(newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true, "bucket_execution_enabled", false)), tableHandle);
Map<String, Integer> columnIndex = indexColumns(columnHandles);
assertEquals(properties.getLocalProperties(), ImmutableList.of(new SortingProperty<>(columnHandles.get(columnIndex.get("value_asc")), ASC_NULLS_FIRST), new SortingProperty<>(columnHandles.get(columnIndex.get("value_desc")), DESC_NULLS_LAST)));
assertThat(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties()).isEmpty();
List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
assertThat(splits).hasSize(bucketCount);
int actualRowCount = 0;
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
String lastValueAsc = null;
long lastValueDesc = -1;
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page == null) {
continue;
}
for (int i = 0; i < page.getPositionCount(); i++) {
Block blockAsc = page.getBlock(1);
Block blockDesc = page.getBlock(2);
assertFalse(blockAsc.isNull(i));
assertFalse(blockDesc.isNull(i));
String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8();
if (lastValueAsc != null) {
assertGreaterThanOrEqual(valueAsc, lastValueAsc);
if (valueAsc.equals(lastValueAsc)) {
long valueDesc = BIGINT.getLong(blockDesc, i);
if (lastValueDesc != -1) {
assertLessThanOrEqual(valueDesc, lastValueDesc);
}
lastValueDesc = valueDesc;
} else {
lastValueDesc = -1;
}
}
lastValueAsc = valueAsc;
actualRowCount++;
}
}
}
}
assertThat(actualRowCount).isEqualTo(expectedRowCount);
}
}
use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.
the class AbstractTestHiveFileSystem method createTable.
private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
// write the records
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// commit the table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
// Hack to work around the metastore not being configured for S3 or other FS.
// The metastore tries to validate the location when creating the
// table, which fails without explicit configuration for file system.
// We work around that by using a dummy location when creating the
// table and update it here to the correct location.
metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).getTargetPath().toString());
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
// verify the data
metadata.beginQuery(session);
ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
}
metadata.cleanupQuery(session);
}
}
Aggregations