Search in sources :

Example 6 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class AbstractTestHive method testGetRecords.

@Test
public void testGetRecords() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
        assertEquals(splits.size(), tablePartitionFormatPartitions.size());
        for (ConnectorSplit split : splits) {
            HiveSplit hiveSplit = (HiveSplit) split;
            List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
            String ds = partitionKeys.get(0).getValue();
            String fileFormat = partitionKeys.get(1).getValue();
            HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH));
            int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue());
            long rowNumber = 0;
            long completedBytes = 0;
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
                MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
                assertPageSourceType(pageSource, fileType);
                for (MaterializedRow row : result) {
                    try {
                        assertValueTypes(row, tableMetadata.getColumns());
                    } catch (RuntimeException e) {
                        throw new RuntimeException("row " + rowNumber, e);
                    }
                    rowNumber++;
                    Object value;
                    value = row.getField(columnIndex.get("t_string"));
                    if (rowNumber % 19 == 0) {
                        assertNull(value);
                    } else if (rowNumber % 19 == 1) {
                        assertEquals(value, "");
                    } else {
                        assertEquals(value, "test");
                    }
                    assertEquals(row.getField(columnIndex.get("t_tinyint")), (byte) (1 + rowNumber));
                    assertEquals(row.getField(columnIndex.get("t_smallint")), (short) (2 + rowNumber));
                    assertEquals(row.getField(columnIndex.get("t_int")), 3 + (int) rowNumber);
                    if (rowNumber % 13 == 0) {
                        assertNull(row.getField(columnIndex.get("t_bigint")));
                    } else {
                        assertEquals(row.getField(columnIndex.get("t_bigint")), 4 + rowNumber);
                    }
                    assertEquals((Float) row.getField(columnIndex.get("t_float")), 5.1f + rowNumber, 0.001);
                    assertEquals(row.getField(columnIndex.get("t_double")), 6.2 + rowNumber);
                    if (rowNumber % 3 == 2) {
                        assertNull(row.getField(columnIndex.get("t_boolean")));
                    } else {
                        assertEquals(row.getField(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
                    }
                    assertEquals(row.getField(columnIndex.get("ds")), ds);
                    assertEquals(row.getField(columnIndex.get("file_format")), fileFormat);
                    assertEquals(row.getField(columnIndex.get("dummy")), dummyPartition);
                    long newCompletedBytes = pageSource.getCompletedBytes();
                    assertTrue(newCompletedBytes >= completedBytes);
                    assertTrue(newCompletedBytes <= hiveSplit.getLength());
                    completedBytes = newCompletedBytes;
                }
                assertTrue(completedBytes <= hiveSplit.getLength());
                assertEquals(rowNumber, 100);
            }
        }
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) Constraint(io.trino.spi.connector.Constraint) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) MaterializedRow(io.trino.testing.MaterializedRow) Test(org.testng.annotations.Test)

Example 7 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class AbstractTestHive method assertTableIsBucketed.

private void assertTableIsBucketed(ConnectorTableHandle tableHandle, Transaction transaction, ConnectorSession session) {
    // the bucketed test tables should have ~32 splits
    List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
    assertThat(splits.size()).as("splits.size()").isBetween(31, 32);
    // verify all paths are unique
    Set<String> paths = new HashSet<>();
    for (ConnectorSplit split : splits) {
        assertTrue(paths.add(((HiveSplit) split).getPath()));
    }
}
Also used : ConnectorSplit(io.trino.spi.connector.ConnectorSplit) HashSet(java.util.HashSet)

Example 8 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class TestHiveSplitSource method testCorrectlyGeneratingInitialRowId.

@Test
public void testCorrectlyGeneratingInitialRowId() {
    HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(SESSION, "database", "table", 10, 10, DataSize.of(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), false);
    // add 10 splits
    for (int i = 0; i < 10; i++) {
        hiveSplitSource.addToQueue(new TestSplit(i));
        assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
    }
    List<ConnectorSplit> splits = getSplits(hiveSplitSource, 10);
    assertEquals(((HiveSplit) splits.get(0)).getSplitNumber(), 0);
    assertEquals(((HiveSplit) splits.get(5)).getSplitNumber(), 5);
    assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), 0);
}
Also used : CounterStat(io.airlift.stats.CounterStat) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Example 9 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class CassandraSplitManager method getSplitsForPartitions.

private List<ConnectorSplit> getSplitsForPartitions(CassandraTableHandle cassTableHandle, List<CassandraPartition> partitions, String clusteringPredicates) {
    String schema = cassTableHandle.getSchemaName();
    HostAddressFactory hostAddressFactory = new HostAddressFactory();
    ImmutableList.Builder<ConnectorSplit> builder = ImmutableList.builder();
    // For single partition key column table, we can merge multiple partitions into a single split
    // by using IN CLAUSE in a single select query if the partitions have the same host list.
    // For multiple partition key columns table, we can't merge them into a single select query, so
    // keep them in a separate split.
    boolean singlePartitionKeyColumn = true;
    String partitionKeyColumnName = null;
    if (!partitions.isEmpty()) {
        singlePartitionKeyColumn = partitions.get(0).getTupleDomain().getDomains().get().size() == 1;
        if (singlePartitionKeyColumn) {
            String partitionId = partitions.get(0).getPartitionId();
            partitionKeyColumnName = partitionId.substring(0, partitionId.lastIndexOf('=') - 1);
        }
    }
    Map<Set<String>, Set<String>> hostsToPartitionKeys = new HashMap<>();
    Map<Set<String>, List<HostAddress>> hostMap = new HashMap<>();
    for (CassandraPartition cassandraPartition : partitions) {
        Set<Host> hosts = cassandraSession.getReplicas(schema, cassandraPartition.getKeyAsByteBuffer());
        List<HostAddress> addresses = hostAddressFactory.toHostAddressList(hosts);
        if (singlePartitionKeyColumn) {
            // host ip addresses
            ImmutableSet.Builder<String> sb = ImmutableSet.builder();
            for (HostAddress address : addresses) {
                sb.add(address.getHostText());
            }
            Set<String> hostAddresses = sb.build();
            // partition key values
            Set<String> values = hostsToPartitionKeys.get(hostAddresses);
            if (values == null) {
                values = new HashSet<>();
            }
            String partitionId = cassandraPartition.getPartitionId();
            values.add(partitionId.substring(partitionId.lastIndexOf('=') + 2));
            hostsToPartitionKeys.put(hostAddresses, values);
            hostMap.put(hostAddresses, addresses);
        } else {
            builder.add(createSplitForClusteringPredicates(cassandraPartition.getPartitionId(), addresses, clusteringPredicates));
        }
    }
    if (singlePartitionKeyColumn) {
        for (Map.Entry<Set<String>, Set<String>> entry : hostsToPartitionKeys.entrySet()) {
            StringBuilder sb = new StringBuilder(partitionSizeForBatchSelect);
            int size = 0;
            for (String value : entry.getValue()) {
                if (size > 0) {
                    sb.append(",");
                }
                sb.append(value);
                size++;
                if (size > partitionSizeForBatchSelect) {
                    String partitionId = format("%s in (%s)", partitionKeyColumnName, sb);
                    builder.add(createSplitForClusteringPredicates(partitionId, hostMap.get(entry.getKey()), clusteringPredicates));
                    size = 0;
                    sb.setLength(0);
                    sb.trimToSize();
                }
            }
            if (size > 0) {
                String partitionId = format("%s in (%s)", partitionKeyColumnName, sb);
                builder.add(createSplitForClusteringPredicates(partitionId, hostMap.get(entry.getKey()), clusteringPredicates));
            }
        }
    }
    return builder.build();
}
Also used : HostAddressFactory(io.trino.plugin.cassandra.util.HostAddressFactory) HashSet(java.util.HashSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) Host(com.datastax.driver.core.Host) HostAddress(io.trino.spi.HostAddress) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) HashMap(java.util.HashMap) Map(java.util.Map)

Example 10 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class TestDeltaLakeSplitManager method getSplits.

private List<DeltaLakeSplit> getSplits(DeltaLakeSplitManager splitManager, DeltaLakeConfig deltaLakeConfig) throws ExecutionException, InterruptedException {
    ConnectorSplitSource splitSource = splitManager.getSplits(// ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle handle, SplitSchedulingStrategy splitSchedulingStrategy
    new HiveTransactionHandle(false), testingConnectorSessionWithConfig(deltaLakeConfig), tableHandle, ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING, DynamicFilter.EMPTY, Constraint.alwaysTrue());
    ImmutableList.Builder<DeltaLakeSplit> splits = ImmutableList.builder();
    while (!splitSource.isFinished()) {
        List<ConnectorSplit> nextBatch = splitSource.getNextBatch(NOT_PARTITIONED, 10).get().getSplits();
        splits.addAll(nextBatch.stream().map(split -> (DeltaLakeSplit) split).collect(Collectors.toList()));
    }
    return splits.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Aggregations

ConnectorSplit (io.trino.spi.connector.ConnectorSplit)44 ColumnHandle (io.trino.spi.connector.ColumnHandle)21 ImmutableList (com.google.common.collect.ImmutableList)19 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)18 ConnectorSession (io.trino.spi.connector.ConnectorSession)17 List (java.util.List)14 Test (org.testng.annotations.Test)14 ConnectorSplitSource (io.trino.spi.connector.ConnectorSplitSource)13 ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)13 FixedSplitSource (io.trino.spi.connector.FixedSplitSource)12 ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)11 Objects.requireNonNull (java.util.Objects.requireNonNull)11 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)9 Inject (javax.inject.Inject)9 HostAddress (io.trino.spi.HostAddress)8 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)7 Constraint (io.trino.spi.connector.Constraint)7 DynamicFilter (io.trino.spi.connector.DynamicFilter)7 MaterializedResult (io.trino.testing.MaterializedResult)7 ArrayList (java.util.ArrayList)7