Search in sources :

Example 1 with CarbondataTableCacheModel

use of io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel in project hetu-core by openlookeng.

the class CarbondataPageSourceProvider method getCarbonTable.

/**
 * @param carbonSplit
 * @return
 */
private CarbonTable getCarbonTable(HiveSplit carbonSplit, Configuration configuration) {
    CarbondataTableCacheModel tableCacheModel = carbonTableReader.getCarbonCache(new SchemaTableName(carbonSplit.getDatabase(), carbonSplit.getTable()), carbonSplit.getSchema().getProperty("tablePath"), configuration);
    requireNonNull(tableCacheModel, "tableCacheModel should not be null");
    requireNonNull(tableCacheModel.getCarbonTable(), "tableCacheModel.carbonTable should not be null");
    requireNonNull(tableCacheModel.getCarbonTable().getTableInfo(), "tableCacheModel.carbonTable.tableInfo should not be null");
    return tableCacheModel.getCarbonTable();
}
Also used : CarbondataTableCacheModel(io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Example 2 with CarbondataTableCacheModel

use of io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel in project hetu-core by openlookeng.

the class CarbondataSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, Supplier<List<Set<DynamicFilter>>> dynamicFilterSupplier, Optional<QueryType> queryType, Map<String, Object> queryProperties, Set<TupleDomain<ColumnMetadata>> userDefinedCachePredicates, boolean partOfReuse) {
    HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
    SchemaTableName schemaTableName = hiveTable.getSchemaTableName();
    // get table metadata
    HiveIdentity identity = new HiveIdentity(session);
    SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
    Table table = metastore.getTable(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    if (!table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
        throw new PrestoException(NOT_SUPPORTED, "Carbondata connector can only read carbondata tables");
    }
    return hdfsEnvironment.doAs(session.getUser(), () -> {
        String location = table.getStorage().getLocation();
        String queryId = System.nanoTime() + "";
        QueryStatistic statistic = new QueryStatistic();
        QueryStatisticsRecorder statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder();
        statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis());
        statisticRecorder.recordStatisticsForDriver(statistic, queryId);
        statistic = new QueryStatistic();
        carbonTableReader.setQueryId(queryId);
        TupleDomain<HiveColumnHandle> predicate = (TupleDomain<HiveColumnHandle>) hiveTable.getCompactEffectivePredicate();
        Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, schemaTableName.getSchemaName(), schemaTableName.getTableName()), new Path(location));
        // set the hadoop configuration to thread local, so that FileFactory can use it.
        ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
        CarbondataTableCacheModel cache = carbonTableReader.getCarbonCache(schemaTableName, location, configuration);
        Expression filters = CarbondataHetuFilterUtil.parseFilterExpression(predicate);
        try {
            List<CarbondataLocalMultiBlockSplit> splits = carbonTableReader.getInputSplits(cache, filters, predicate, configuration);
            ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
            long index = 0;
            for (CarbondataLocalMultiBlockSplit split : splits) {
                index++;
                Properties properties = new Properties();
                for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
                    properties.setProperty(entry.getKey(), entry.getValue());
                }
                properties.setProperty("tablePath", cache.getCarbonTable().getTablePath());
                properties.setProperty("carbonSplit", split.getJsonString());
                properties.setProperty("queryId", queryId);
                properties.setProperty("index", String.valueOf(index));
                cSplits.add(HiveSplitWrapper.wrap(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), cache.getCarbonTable().getTablePath(), 0, 0, 0, 0, properties, new ArrayList(), getHostAddresses(split.getLocations()), OptionalInt.empty(), false, new HashMap<>(), Optional.empty(), false, Optional.empty(), Optional.empty(), false, ImmutableMap.of())));
            /* Todo: Make this part aligned with rest of the HiveSlipt loading flow...
                     *   and figure out how to pass valid transaction Ids to CarbonData? */
            }
            statisticRecorder.logStatisticsAsTableDriver();
            statistic.addStatistics(QueryStatisticsConstants.BLOCK_IDENTIFICATION, System.currentTimeMillis());
            statisticRecorder.recordStatisticsForDriver(statistic, queryId);
            statisticRecorder.logStatisticsAsTableDriver();
            if (queryType != null && queryType.isPresent() && queryType.get().equals(QueryType.VACUUM)) {
                // Get Splits for compaction
                return getSplitsForCompaction(identity, transactionHandle, tableHandle, cache.getCarbonTable().getTablePath(), queryProperties, queryId, cSplits, configuration);
            }
            return new FixedSplitSource(cSplits.build());
        } catch (IOException ex) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "Failed while trying to get splits ", ex);
        }
    });
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) PrestoException(io.prestosql.spi.PrestoException) Properties(java.util.Properties) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) CarbondataLocalMultiBlockSplit(io.hetu.core.plugin.carbondata.impl.CarbondataLocalMultiBlockSplit) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) Path(org.apache.hadoop.fs.Path) CarbondataTableCacheModel(io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel) Table(io.prestosql.plugin.hive.metastore.Table) IOException(java.io.IOException) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveSplit(io.prestosql.plugin.hive.HiveSplit) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Expression(org.apache.carbondata.core.scan.expression.Expression) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 3 with CarbondataTableCacheModel

use of io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel in project hetu-core by openlookeng.

the class CarbondataMetadata method getCarbonTable.

static CarbonTable getCarbonTable(String dbName, String tableName, Properties schema, Configuration configuration, CarbondataTableReader carbondataTableReader) {
    CarbondataTableCacheModel tableCacheModel = carbondataTableReader.getCarbonCache(new SchemaTableName(dbName, tableName), schema.getProperty("tablePath"), configuration);
    requireNonNull(tableCacheModel, "tableCacheModel should not be null");
    requireNonNull(tableCacheModel.getCarbonTable(), "tableCacheModel.carbonTable should not be null");
    requireNonNull(tableCacheModel.getCarbonTable().getTableInfo(), "tableCacheModel.carbonTable.tableInfo should not be null");
    return tableCacheModel.getCarbonTable();
}
Also used : CarbondataTableCacheModel(io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Aggregations

CarbondataTableCacheModel (io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel)3 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)3 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 CarbondataLocalMultiBlockSplit (io.hetu.core.plugin.carbondata.impl.CarbondataLocalMultiBlockSplit)1 HdfsEnvironment (io.prestosql.plugin.hive.HdfsEnvironment)1 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)1 HiveSplit (io.prestosql.plugin.hive.HiveSplit)1 HiveTableHandle (io.prestosql.plugin.hive.HiveTableHandle)1 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)1 SemiTransactionalHiveMetastore (io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore)1 Table (io.prestosql.plugin.hive.metastore.Table)1 PrestoException (io.prestosql.spi.PrestoException)1 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)1 FixedSplitSource (io.prestosql.spi.connector.FixedSplitSource)1 TableNotFoundException (io.prestosql.spi.connector.TableNotFoundException)1 TupleDomain (io.prestosql.spi.predicate.TupleDomain)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1