Search in sources :

Example 1 with HiveSplit

use of com.facebook.presto.hive.HiveSplit in project carbondata by apache.

the class CarbondataSplitManager method getSplits.

public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layoutHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
    HiveTableLayoutHandle layout = (HiveTableLayoutHandle) layoutHandle;
    SchemaTableName schemaTableName = layout.getSchemaTableName();
    carbonTableReader.setPrestoQueryId(session.getQueryId());
    // get table metadata
    SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
    Table table = metastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    if (!table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
        return super.getSplits(transactionHandle, session, layoutHandle, splitSchedulingStrategy);
    }
    // for hive metastore, get table location from catalog table's tablePath
    String location = table.getStorage().getSerdeParameters().get("tablePath");
    if (StringUtils.isEmpty(location)) {
        // file metastore case tablePath can be null, so get from location
        location = table.getStorage().getLocation();
    }
    List<PartitionSpec> filteredPartitions = new ArrayList<>();
    if (layout.getPartitionColumns().size() > 0 && layout.getPartitions().isPresent()) {
        List<String> colNames = layout.getPartitionColumns().stream().map(x -> ((HiveColumnHandle) x).getName()).collect(Collectors.toList());
        for (HivePartition partition : layout.getPartitions().get()) {
            filteredPartitions.add(new PartitionSpec(colNames, location + CarbonCommonConstants.FILE_SEPARATOR + partition.getPartitionId()));
        }
    }
    String queryId = System.nanoTime() + "";
    QueryStatistic statistic = new QueryStatistic();
    QueryStatisticsRecorder statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis());
    statisticRecorder.recordStatisticsForDriver(statistic, queryId);
    statistic = new QueryStatistic();
    carbonTableReader.setQueryId(queryId);
    TupleDomain<HiveColumnHandle> predicate = (TupleDomain<HiveColumnHandle>) layout.getCompactEffectivePredicate();
    Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, schemaTableName.getSchemaName(), schemaTableName.getTableName()), new Path(location));
    configuration = carbonTableReader.updateS3Properties(configuration);
    for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
        configuration.set(entry.getKey(), entry.getValue());
    }
    // set the hadoop configuration to thread local, so that FileFactory can use it.
    ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
    CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(schemaTableName, location, configuration);
    Expression filters = PrestoFilterUtil.parseFilterExpression(predicate);
    try {
        List<CarbonLocalMultiBlockSplit> splits = carbonTableReader.getInputSplits(cache, filters, filteredPartitions, configuration);
        ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
        long index = 0;
        for (CarbonLocalMultiBlockSplit split : splits) {
            index++;
            Properties properties = new Properties();
            for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
                properties.setProperty(entry.getKey(), entry.getValue());
            }
            properties.setProperty("tablePath", cache.getCarbonTable().getTablePath());
            properties.setProperty("carbonSplit", split.getJsonString());
            properties.setProperty("queryId", queryId);
            properties.setProperty("index", String.valueOf(index));
            cSplits.add(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), "", 0, 0, 0, properties, new ArrayList(), getHostAddresses(split.getLocations()), OptionalInt.empty(), false, predicate, new HashMap<>(), Optional.empty(), false));
        }
        statisticRecorder.logStatisticsAsTableDriver();
        statistic.addStatistics(QueryStatisticsConstants.BLOCK_IDENTIFICATION, System.currentTimeMillis());
        statisticRecorder.recordStatisticsForDriver(statistic, queryId);
        statisticRecorder.logStatisticsAsTableDriver();
        return new FixedSplitSource(cSplits.build());
    } catch (Exception ex) {
        throw new RuntimeException(ex.getMessage(), ex);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Arrays(java.util.Arrays) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) CarbonLocalMultiBlockSplit(org.apache.carbondata.presto.impl.CarbonLocalMultiBlockSplit) HiveTableLayoutHandle(com.facebook.presto.hive.HiveTableLayoutHandle) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) StringUtils(org.apache.commons.lang3.StringUtils) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) CarbonTableReader(org.apache.carbondata.presto.impl.CarbonTableReader) HiveTransactionHandle(com.facebook.presto.hive.HiveTransactionHandle) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) ForHiveClient(com.facebook.presto.hive.ForHiveClient) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) Expression(org.apache.carbondata.core.scan.expression.Expression) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HostAddress(com.facebook.presto.spi.HostAddress) DirectoryLister(com.facebook.presto.hive.DirectoryLister) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Collectors(java.util.stream.Collectors) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) List(java.util.List) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) Table(com.facebook.presto.hive.metastore.Table) HiveSplitManager(com.facebook.presto.hive.HiveSplitManager) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) HashMap(java.util.HashMap) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) ThreadLocalSessionInfo(org.apache.carbondata.core.util.ThreadLocalSessionInfo) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) CarbonTimeStatisticsFactory(org.apache.carbondata.core.util.CarbonTimeStatisticsFactory) NamenodeStats(com.facebook.presto.hive.NamenodeStats) ExecutorService(java.util.concurrent.ExecutorService) Properties(java.util.Properties) CoercionPolicy(com.facebook.presto.hive.CoercionPolicy) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) QueryStatisticsConstants(org.apache.carbondata.core.stats.QueryStatisticsConstants) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HivePartition(com.facebook.presto.hive.HivePartition) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnHandle(com.facebook.presto.spi.ColumnHandle) HiveSplit(com.facebook.presto.hive.HiveSplit) Configuration(org.apache.hadoop.conf.Configuration) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Properties(java.util.Properties) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) HiveTableLayoutHandle(com.facebook.presto.hive.HiveTableLayoutHandle) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) Path(org.apache.hadoop.fs.Path) Table(com.facebook.presto.hive.metastore.Table) SchemaTableName(com.facebook.presto.spi.SchemaTableName) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) HiveSplit(com.facebook.presto.hive.HiveSplit) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Expression(org.apache.carbondata.core.scan.expression.Expression) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) CarbonLocalMultiBlockSplit(org.apache.carbondata.presto.impl.CarbonLocalMultiBlockSplit) Map(java.util.Map) HashMap(java.util.HashMap) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HivePartition(com.facebook.presto.hive.HivePartition)

Example 2 with HiveSplit

use of com.facebook.presto.hive.HiveSplit in project carbondata by apache.

the class CarbondataPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<ColumnHandle> columns) {
    HiveSplit carbonSplit = checkType(split, HiveSplit.class, "split is not class HiveSplit");
    this.queryId = carbonSplit.getSchema().getProperty("queryId");
    if (this.queryId == null) {
        // Fall back to hive pagesource.
        return super.createPageSource(transactionHandle, session, split, columns);
    }
    Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase(), carbonSplit.getTable()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
    configuration = carbonTableReader.updateS3Properties(configuration);
    for (Map.Entry<Object, Object> entry : carbonSplit.getSchema().entrySet()) {
        configuration.set(entry.getKey().toString(), entry.getValue().toString());
    }
    CarbonTable carbonTable = getCarbonTable(carbonSplit, configuration);
    boolean isDirectVectorFill = carbonTableReader.config.getPushRowFilter() == null || carbonTableReader.config.getPushRowFilter().equalsIgnoreCase("false");
    return new CarbondataPageSource(carbonTable, queryId, carbonSplit, columns, configuration, isDirectVectorFill);
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) HiveSplit(com.facebook.presto.hive.HiveSplit) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment)

Aggregations

HdfsEnvironment (com.facebook.presto.hive.HdfsEnvironment)2 HiveSplit (com.facebook.presto.hive.HiveSplit)2 Map (java.util.Map)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 CoercionPolicy (com.facebook.presto.hive.CoercionPolicy)1 DirectoryLister (com.facebook.presto.hive.DirectoryLister)1 ForHiveClient (com.facebook.presto.hive.ForHiveClient)1 HiveClientConfig (com.facebook.presto.hive.HiveClientConfig)1 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)1 HivePartition (com.facebook.presto.hive.HivePartition)1 HiveSplitManager (com.facebook.presto.hive.HiveSplitManager)1 HiveTableLayoutHandle (com.facebook.presto.hive.HiveTableLayoutHandle)1 HiveTransactionHandle (com.facebook.presto.hive.HiveTransactionHandle)1 NamenodeStats (com.facebook.presto.hive.NamenodeStats)1 SemiTransactionalHiveMetastore (com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore)1 Table (com.facebook.presto.hive.metastore.Table)1 ColumnHandle (com.facebook.presto.spi.ColumnHandle)1 ConnectorSession (com.facebook.presto.spi.ConnectorSession)1 ConnectorSplit (com.facebook.presto.spi.ConnectorSplit)1