Search in sources :

Example 36 with ConnectorSplitSource

use of io.prestosql.spi.connector.ConnectorSplitSource in project hetu-core by openlookeng.

the class CarbondataSplitManager method getSplitsForCompaction.

/*
     * Convert the splits into batches based on task id and wrap around ConnectorSplitSource to send back
     */
public ConnectorSplitSource getSplitsForCompaction(HiveIdentity identity, ConnectorTransactionHandle transactionHandle, ConnectorTableHandle tableHandle, String tablePath, Map<String, Object> queryProperties, String queryId, ImmutableList.Builder<ConnectorSplit> allSplitsForComp, Configuration configuration) throws PrestoException {
    HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
    SchemaTableName schemaTableName = hiveTable.getSchemaTableName();
    List<List<LoadMetadataDetails>> allGroupedSegList;
    // Step 1: Get table handles and metadata
    SemiTransactionalHiveMetastore metaStore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
    Table table = metaStore.getTable(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    Properties hiveSchema = MetastoreUtil.getHiveSchema(table);
    CarbonLoadModel carbonLoadModel = null;
    try {
        carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(hiveSchema, configuration);
    } catch (Exception e) {
        LOGGER.error("Cannot create carbon load model");
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "Cannot create carbon load model");
    }
    CompactionType compactionType = (queryProperties.get("FULL") == Boolean.valueOf("true")) ? CompactionType.MAJOR : CompactionType.MINOR;
    // Step 2: Get segments to be merged based on configuration passed
    allGroupedSegList = CarbondataHetuCompactorUtil.identifyAndGroupSegmentsToBeMerged(carbonLoadModel, configuration, compactionType, carbondataConfig.getMajorVacuumSegSize(), carbondataConfig.getMinorVacuumSegCount());
    // All the splits are grouped based on taskIds and compaction level into one builder
    ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
    Gson gson = new Gson();
    for (List<LoadMetadataDetails> segmentsToBeMerged : allGroupedSegList) {
        String mergedLoadName = CarbonDataMergerUtil.getMergedLoadName(segmentsToBeMerged);
        // Step 3:  Get all the splits for the required segments and divide them based on task ids
        Map<String, List<CarbondataLocalInputSplit>> taskIdToSplitMapping = new HashMap<>();
        for (ConnectorSplit connectorSplit : allSplitsForComp.build()) {
            HiveSplit currSplit = ((HiveSplitWrapper) connectorSplit).getSplits().get(0);
            CarbondataLocalMultiBlockSplit currSplits = gson.fromJson(currSplit.getSchema().getProperty("carbonSplit"), CarbondataLocalMultiBlockSplit.class);
            for (CarbondataLocalInputSplit split : currSplits.getSplitList()) {
                CarbonInputSplit carbonInputSplit = CarbondataLocalInputSplit.convertSplit(split);
                String taskId = carbonInputSplit.taskId;
                String segmentNo = carbonInputSplit.getSegmentId();
                for (LoadMetadataDetails load : segmentsToBeMerged) {
                    if (load.getLoadName().equals(segmentNo)) {
                        List<CarbondataLocalInputSplit> currList = taskIdToSplitMapping.computeIfAbsent(taskId, k -> new ArrayList<>());
                        currList.add(split);
                    }
                }
            }
        }
        // Step 4: Create the ConnectorSplitSource with the splits divided and return
        long index = 0;
        for (Map.Entry<String, List<CarbondataLocalInputSplit>> splitEntry : taskIdToSplitMapping.entrySet()) {
            CarbondataLocalMultiBlockSplit currSplit = new CarbondataLocalMultiBlockSplit(splitEntry.getValue(), splitEntry.getValue().stream().flatMap(f -> Arrays.stream(getLocations(f))).distinct().toArray(String[]::new));
            index++;
            Properties properties = new Properties();
            for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
                properties.setProperty(entry.getKey(), entry.getValue());
            }
            // TODO: Use the existing CarbondataLocalInputSplit list to convert
            properties.setProperty("tablePath", tablePath);
            properties.setProperty("carbonSplit", currSplit.getJsonString());
            properties.setProperty("queryId", queryId);
            properties.setProperty("index", String.valueOf(index));
            properties.setProperty("mergeLoadName", mergedLoadName);
            properties.setProperty("compactionType", compactionType.toString());
            properties.setProperty("taskNo", splitEntry.getKey());
            cSplits.add(HiveSplitWrapper.wrap(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), tablePath, 0L, 0L, 0L, 0L, properties, new ArrayList(), getHostAddresses(currSplit.getLocations()), OptionalInt.empty(), false, new HashMap<>(), Optional.empty(), false, Optional.empty(), Optional.empty(), false, ImmutableMap.of())));
        }
    }
    LOGGER.info("Splits for compaction built and ready");
    return new FixedSplitSource(cSplits.build());
}
Also used : CarbondataLocalInputSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalInputSplit) HiveSplitWrapper(io.prestosql.plugin.hive.HiveSplitWrapper) HivePartitionManager(io.prestosql.plugin.hive.HivePartitionManager) VersionEmbedder(io.prestosql.spi.VersionEmbedder) Arrays(java.util.Arrays) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) QueryType(io.prestosql.spi.resourcegroups.QueryType) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) ForHive(io.prestosql.plugin.hive.ForHive) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) Logger(org.apache.log4j.Logger) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Gson(com.google.gson.Gson) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HiveCarbonUtil(org.apache.carbondata.hive.util.HiveCarbonUtil) HiveTransactionHandle(io.prestosql.plugin.hive.HiveTransactionHandle) Path(org.apache.hadoop.fs.Path) Expression(org.apache.carbondata.core.scan.expression.Expression) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) NamenodeStats(io.prestosql.plugin.hive.NamenodeStats) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) List(java.util.List) Table(io.prestosql.plugin.hive.metastore.Table) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) CoercionPolicy(io.prestosql.plugin.hive.CoercionPolicy) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) CarbondataLocalInputSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalInputSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) CarbonDataMergerUtil(org.apache.carbondata.processing.merger.CarbonDataMergerUtil) HashMap(java.util.HashMap) CompactionType(org.apache.carbondata.processing.merger.CompactionType) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) Supplier(java.util.function.Supplier) CarbondataTableReader(io.hetu.core.plugin.carbondata.impl.CarbondataTableReader) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ThreadLocalSessionInfo(org.apache.carbondata.core.util.ThreadLocalSessionInfo) ImmutableList(com.google.common.collect.ImmutableList) CarbonTimeStatisticsFactory(org.apache.carbondata.core.util.CarbonTimeStatisticsFactory) Objects.requireNonNull(java.util.Objects.requireNonNull) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) DirectoryLister(io.prestosql.plugin.hive.DirectoryLister) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) ExecutorService(java.util.concurrent.ExecutorService) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Properties(java.util.Properties) HostAddress(io.prestosql.spi.HostAddress) CarbondataLocalMultiBlockSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalMultiBlockSplit) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) QueryStatisticsConstants(org.apache.carbondata.core.stats.QueryStatisticsConstants) CarbondataTableCacheModel(io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel) HiveSplit(io.prestosql.plugin.hive.HiveSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) CarbondataTableConfig(io.hetu.core.plugin.carbondata.impl.CarbondataTableConfig) HiveSplitManager(io.prestosql.plugin.hive.HiveSplitManager) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) PrestoException(io.prestosql.spi.PrestoException) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) Properties(java.util.Properties) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) CompactionType(org.apache.carbondata.processing.merger.CompactionType) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) CarbondataLocalMultiBlockSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalMultiBlockSplit) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Table(io.prestosql.plugin.hive.metastore.Table) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) HiveSplit(io.prestosql.plugin.hive.HiveSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 37 with ConnectorSplitSource

use of io.prestosql.spi.connector.ConnectorSplitSource in project hetu-core by openlookeng.

the class AbstractTestHiveFileSystem method testGetRecords.

@Test
public void testGetRecords() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, this.table);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
        List<ConnectorSplit> splits = getAllSplits(splitSource);
        assertEquals(splits.size(), 1);
        long sum = 0;
        for (ConnectorSplit split : splits) {
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
                MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
                for (MaterializedRow row : result) {
                    sum += (Long) row.getField(columnIndex.get("t_bigint"));
                }
            }
        }
        // The test table is made up of multiple S3 objects with same data and different compression codec
        // formats: uncompressed | .gz | .lz4 | .bz2
        assertEquals(sum, 78300 * 4);
    }
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) HiveTransaction(io.prestosql.plugin.hive.AbstractTestHive.HiveTransaction) Transaction(io.prestosql.plugin.hive.AbstractTestHive.Transaction) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) MaterializedResult(io.prestosql.testing.MaterializedResult) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) MaterializedRow(io.prestosql.testing.MaterializedRow) Test(org.testng.annotations.Test)

Example 38 with ConnectorSplitSource

use of io.prestosql.spi.connector.ConnectorSplitSource in project hetu-core by openlookeng.

the class AbstractTestHive method testGetPartitionSplitsBatch.

@Test
public void testGetPartitionSplitsBatch() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
        ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
        assertEquals(getSplitCount(splitSource), tablePartitionFormatPartitions.size());
    }
}
Also used : ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 39 with ConnectorSplitSource

use of io.prestosql.spi.connector.ConnectorSplitSource in project hetu-core by openlookeng.

the class AbstractTestHive method testGetPartitionSplitsBatchUnpartitioned.

@Test
public void testGetPartitionSplitsBatchUnpartitioned() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned);
        ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
        assertEquals(getSplitCount(splitSource), 1);
    }
}
Also used : ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 40 with ConnectorSplitSource

use of io.prestosql.spi.connector.ConnectorSplitSource in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHive method testPartitionSchemaNonCanonical.

// TODO coercion of non-canonical values should be supported
@Test(enabled = false)
public void testPartitionSchemaNonCanonical() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle table = getTableHandle(metadata, tablePartitionSchemaChangeNonCanonical);
        ColumnHandle column = metadata.getColumnHandles(session, table).get("t_boolean");
        Constraint constraint = new Constraint(TupleDomain.fromFixedValues(ImmutableMap.of(column, NullableValue.of(BOOLEAN, false))));
        table = applyFilter(metadata, table, constraint);
        HivePartition partition = getOnlyElement(((HiveTableHandle) table).getPartitions().orElseThrow(AssertionError::new));
        assertEquals(getPartitionId(partition), "t_boolean=0");
        ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, table, UNGROUPED_SCHEDULING);
        ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
        ImmutableList<ColumnHandle> columnHandles = ImmutableList.of(column);
        try (ConnectorPageSource ignored = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, table, columnHandles)) {
            fail("expected exception");
        } catch (PrestoException e) {
            assertEquals(e.getErrorCode(), HiveErrorCode.HIVE_INVALID_PARTITION_VALUE.toErrorCode());
        }
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Aggregations

ConnectorSplitSource (io.prestosql.spi.connector.ConnectorSplitSource)45 Test (org.testng.annotations.Test)36 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)27 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)19 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)18 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)17 ConnectorTransactionHandle (io.prestosql.spi.connector.ConnectorTransactionHandle)17 BaseJdbcConfig (io.prestosql.plugin.jdbc.BaseJdbcConfig)14 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)14 TupleDomain (io.prestosql.spi.predicate.TupleDomain)13 TopicName (org.apache.pulsar.common.naming.TopicName)12 ConnectorMetadata (io.prestosql.spi.connector.ConnectorMetadata)10 TestingConnectorSession (io.prestosql.testing.TestingConnectorSession)10 HashMap (java.util.HashMap)10 Collection (java.util.Collection)9 Domain (io.prestosql.spi.predicate.Domain)7 JdbcSplit (io.prestosql.plugin.jdbc.JdbcSplit)6 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)6 PrestoException (io.prestosql.spi.PrestoException)5 ColumnMetadata (io.prestosql.spi.connector.ColumnMetadata)5