Search in sources :

Example 1 with HiveSplitWrapper

use of io.prestosql.plugin.hive.HiveSplitWrapper in project hetu-core by openlookeng.

the class CarbondataPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns) {
    HiveSplit carbonSplit = Types.checkType(((HiveSplitWrapper) (split)).getSplits().get(0), HiveSplit.class, "split is not class HiveSplit");
    this.queryId = carbonSplit.getSchema().getProperty("queryId");
    if (this.queryId == null) {
        // Fall back to hive pagesource.
        return super.createPageSource(transactionHandle, session, split, table, columns);
    }
    try {
        hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
    } catch (IOException e) {
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "Failed to get file system: " + e.getMessage());
    }
    return hdfsEnvironment.doAs(session.getUser(), () -> {
        Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase(), carbonSplit.getTable()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
        CarbonTable carbonTable = getCarbonTable(carbonSplit, configuration);
        /* So that CarbonTLS can access it */
        ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
        boolean isFullACID = isFullAcidTable(Maps.fromProperties(carbonSplit.getSchema()));
        boolean isDirectVectorFill = (carbonTableReader.config.getPushRowFilter() == null) || carbonTableReader.config.getPushRowFilter().equalsIgnoreCase("false") || columns.stream().anyMatch(c -> c.getColumnName().equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID));
        return new CarbondataPageSource(carbonTable, queryId, carbonSplit, columns, table, configuration, isDirectVectorFill, isFullACID, session.getUser(), hdfsEnvironment);
    });
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) HiveSplitWrapper(io.prestosql.plugin.hive.HiveSplitWrapper) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Inject(com.google.inject.Inject) CarbondataTableReader(io.hetu.core.plugin.carbondata.impl.CarbondataTableReader) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ThreadLocalSessionInfo(org.apache.carbondata.core.util.ThreadLocalSessionInfo) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Configuration(org.apache.hadoop.conf.Configuration) Objects.requireNonNull(java.util.Objects.requireNonNull) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) Path(org.apache.hadoop.fs.Path) HivePageSourceFactory(io.prestosql.plugin.hive.HivePageSourceFactory) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) PrestoException(io.prestosql.spi.PrestoException) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) HivePageSourceProvider(io.prestosql.plugin.hive.HivePageSourceProvider) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TypeManager(io.prestosql.spi.type.TypeManager) Set(java.util.Set) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) CarbondataTableCacheModel(io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel) HiveSplit(io.prestosql.plugin.hive.HiveSplit) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) HiveRecordCursorProvider(io.prestosql.plugin.hive.HiveRecordCursorProvider) HiveSplit(io.prestosql.plugin.hive.HiveSplit) Configuration(org.apache.hadoop.conf.Configuration) HiveSplitWrapper(io.prestosql.plugin.hive.HiveSplitWrapper) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment)

Example 2 with HiveSplitWrapper

use of io.prestosql.plugin.hive.HiveSplitWrapper in project hetu-core by openlookeng.

the class CarbondataSplitManager method getSplitsForCompaction.

/*
     * Convert the splits into batches based on task id and wrap around ConnectorSplitSource to send back
     */
public ConnectorSplitSource getSplitsForCompaction(HiveIdentity identity, ConnectorTransactionHandle transactionHandle, ConnectorTableHandle tableHandle, String tablePath, Map<String, Object> queryProperties, String queryId, ImmutableList.Builder<ConnectorSplit> allSplitsForComp, Configuration configuration) throws PrestoException {
    HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
    SchemaTableName schemaTableName = hiveTable.getSchemaTableName();
    List<List<LoadMetadataDetails>> allGroupedSegList;
    // Step 1: Get table handles and metadata
    SemiTransactionalHiveMetastore metaStore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
    Table table = metaStore.getTable(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    Properties hiveSchema = MetastoreUtil.getHiveSchema(table);
    CarbonLoadModel carbonLoadModel = null;
    try {
        carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(hiveSchema, configuration);
    } catch (Exception e) {
        LOGGER.error("Cannot create carbon load model");
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "Cannot create carbon load model");
    }
    CompactionType compactionType = (queryProperties.get("FULL") == Boolean.valueOf("true")) ? CompactionType.MAJOR : CompactionType.MINOR;
    // Step 2: Get segments to be merged based on configuration passed
    allGroupedSegList = CarbondataHetuCompactorUtil.identifyAndGroupSegmentsToBeMerged(carbonLoadModel, configuration, compactionType, carbondataConfig.getMajorVacuumSegSize(), carbondataConfig.getMinorVacuumSegCount());
    // All the splits are grouped based on taskIds and compaction level into one builder
    ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
    Gson gson = new Gson();
    for (List<LoadMetadataDetails> segmentsToBeMerged : allGroupedSegList) {
        String mergedLoadName = CarbonDataMergerUtil.getMergedLoadName(segmentsToBeMerged);
        // Step 3:  Get all the splits for the required segments and divide them based on task ids
        Map<String, List<CarbondataLocalInputSplit>> taskIdToSplitMapping = new HashMap<>();
        for (ConnectorSplit connectorSplit : allSplitsForComp.build()) {
            HiveSplit currSplit = ((HiveSplitWrapper) connectorSplit).getSplits().get(0);
            CarbondataLocalMultiBlockSplit currSplits = gson.fromJson(currSplit.getSchema().getProperty("carbonSplit"), CarbondataLocalMultiBlockSplit.class);
            for (CarbondataLocalInputSplit split : currSplits.getSplitList()) {
                CarbonInputSplit carbonInputSplit = CarbondataLocalInputSplit.convertSplit(split);
                String taskId = carbonInputSplit.taskId;
                String segmentNo = carbonInputSplit.getSegmentId();
                for (LoadMetadataDetails load : segmentsToBeMerged) {
                    if (load.getLoadName().equals(segmentNo)) {
                        List<CarbondataLocalInputSplit> currList = taskIdToSplitMapping.computeIfAbsent(taskId, k -> new ArrayList<>());
                        currList.add(split);
                    }
                }
            }
        }
        // Step 4: Create the ConnectorSplitSource with the splits divided and return
        long index = 0;
        for (Map.Entry<String, List<CarbondataLocalInputSplit>> splitEntry : taskIdToSplitMapping.entrySet()) {
            CarbondataLocalMultiBlockSplit currSplit = new CarbondataLocalMultiBlockSplit(splitEntry.getValue(), splitEntry.getValue().stream().flatMap(f -> Arrays.stream(getLocations(f))).distinct().toArray(String[]::new));
            index++;
            Properties properties = new Properties();
            for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
                properties.setProperty(entry.getKey(), entry.getValue());
            }
            // TODO: Use the existing CarbondataLocalInputSplit list to convert
            properties.setProperty("tablePath", tablePath);
            properties.setProperty("carbonSplit", currSplit.getJsonString());
            properties.setProperty("queryId", queryId);
            properties.setProperty("index", String.valueOf(index));
            properties.setProperty("mergeLoadName", mergedLoadName);
            properties.setProperty("compactionType", compactionType.toString());
            properties.setProperty("taskNo", splitEntry.getKey());
            cSplits.add(HiveSplitWrapper.wrap(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), tablePath, 0L, 0L, 0L, 0L, properties, new ArrayList(), getHostAddresses(currSplit.getLocations()), OptionalInt.empty(), false, new HashMap<>(), Optional.empty(), false, Optional.empty(), Optional.empty(), false, ImmutableMap.of())));
        }
    }
    LOGGER.info("Splits for compaction built and ready");
    return new FixedSplitSource(cSplits.build());
}
Also used : CarbondataLocalInputSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalInputSplit) HiveSplitWrapper(io.prestosql.plugin.hive.HiveSplitWrapper) HivePartitionManager(io.prestosql.plugin.hive.HivePartitionManager) VersionEmbedder(io.prestosql.spi.VersionEmbedder) Arrays(java.util.Arrays) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) QueryType(io.prestosql.spi.resourcegroups.QueryType) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) ForHive(io.prestosql.plugin.hive.ForHive) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) Logger(org.apache.log4j.Logger) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Gson(com.google.gson.Gson) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HiveCarbonUtil(org.apache.carbondata.hive.util.HiveCarbonUtil) HiveTransactionHandle(io.prestosql.plugin.hive.HiveTransactionHandle) Path(org.apache.hadoop.fs.Path) Expression(org.apache.carbondata.core.scan.expression.Expression) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) NamenodeStats(io.prestosql.plugin.hive.NamenodeStats) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) List(java.util.List) Table(io.prestosql.plugin.hive.metastore.Table) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) CoercionPolicy(io.prestosql.plugin.hive.CoercionPolicy) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) CarbondataLocalInputSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalInputSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) CarbonDataMergerUtil(org.apache.carbondata.processing.merger.CarbonDataMergerUtil) HashMap(java.util.HashMap) CompactionType(org.apache.carbondata.processing.merger.CompactionType) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) Supplier(java.util.function.Supplier) CarbondataTableReader(io.hetu.core.plugin.carbondata.impl.CarbondataTableReader) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ThreadLocalSessionInfo(org.apache.carbondata.core.util.ThreadLocalSessionInfo) ImmutableList(com.google.common.collect.ImmutableList) CarbonTimeStatisticsFactory(org.apache.carbondata.core.util.CarbonTimeStatisticsFactory) Objects.requireNonNull(java.util.Objects.requireNonNull) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) DirectoryLister(io.prestosql.plugin.hive.DirectoryLister) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) ExecutorService(java.util.concurrent.ExecutorService) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Properties(java.util.Properties) HostAddress(io.prestosql.spi.HostAddress) CarbondataLocalMultiBlockSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalMultiBlockSplit) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) QueryStatisticsConstants(org.apache.carbondata.core.stats.QueryStatisticsConstants) CarbondataTableCacheModel(io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel) HiveSplit(io.prestosql.plugin.hive.HiveSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) CarbondataTableConfig(io.hetu.core.plugin.carbondata.impl.CarbondataTableConfig) HiveSplitManager(io.prestosql.plugin.hive.HiveSplitManager) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) PrestoException(io.prestosql.spi.PrestoException) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) Properties(java.util.Properties) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) CompactionType(org.apache.carbondata.processing.merger.CompactionType) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) CarbondataLocalMultiBlockSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalMultiBlockSplit) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Table(io.prestosql.plugin.hive.metastore.Table) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) HiveSplit(io.prestosql.plugin.hive.HiveSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Aggregations

CarbondataTableCacheModel (io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel)2 CarbondataTableReader (io.hetu.core.plugin.carbondata.impl.CarbondataTableReader)2 HdfsEnvironment (io.prestosql.plugin.hive.HdfsEnvironment)2 HiveConfig (io.prestosql.plugin.hive.HiveConfig)2 HiveSplit (io.prestosql.plugin.hive.HiveSplit)2 HiveSplitWrapper (io.prestosql.plugin.hive.HiveSplitWrapper)2 PrestoException (io.prestosql.spi.PrestoException)2 GENERIC_INTERNAL_ERROR (io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR)2 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)2 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)2 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)2 ConnectorTransactionHandle (io.prestosql.spi.connector.ConnectorTransactionHandle)2 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)2 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Maps (com.google.common.collect.Maps)1 Gson (com.google.gson.Gson)1 Inject (com.google.inject.Inject)1 CarbondataLocalInputSplit (io.hetu.core.plugin.carbondata.impl.CarbondataLocalInputSplit)1