Search in sources :

Example 1 with HiveIdentity

use of io.prestosql.plugin.hive.authentication.HiveIdentity in project carbondata by apache.

the class CarbondataSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    SchemaTableName schemaTableName = hiveTableHandle.getSchemaTableName();
    carbonTableReader.setPrestoQueryId(session.getQueryId());
    // get table metadata
    SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
    Table table = metastore.getTable(new HiveIdentity(session), schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    if (!table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
        return super.getSplits(transactionHandle, session, tableHandle, splitSchedulingStrategy);
    }
    // for hive metastore, get table location from catalog table's tablePath
    String location = table.getStorage().getSerdeParameters().get("tablePath");
    if (StringUtils.isEmpty(location)) {
        // file metastore case tablePath can be null, so get from location
        location = table.getStorage().getLocation();
    }
    List<PartitionSpec> filteredPartitions = new ArrayList<>();
    if (hiveTableHandle.getPartitionColumns().size() > 0 && hiveTableHandle.getPartitions().isPresent()) {
        List<String> colNames = hiveTableHandle.getPartitionColumns().stream().map(HiveColumnHandle::getName).collect(Collectors.toList());
        for (HivePartition partition : hiveTableHandle.getPartitions().get()) {
            filteredPartitions.add(new PartitionSpec(colNames, location + CarbonCommonConstants.FILE_SEPARATOR + partition.getPartitionId()));
        }
    }
    String queryId = System.nanoTime() + "";
    QueryStatistic statistic = new QueryStatistic();
    QueryStatisticsRecorder statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis());
    statisticRecorder.recordStatisticsForDriver(statistic, queryId);
    statistic = new QueryStatistic();
    carbonTableReader.setQueryId(queryId);
    TupleDomain<HiveColumnHandle> predicate = hiveTableHandle.getCompactEffectivePredicate();
    Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, schemaTableName.getSchemaName(), schemaTableName.getTableName()), new Path(location));
    configuration = carbonTableReader.updateS3Properties(configuration);
    for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
        configuration.set(entry.getKey(), entry.getValue());
    }
    // set the hadoop configuration to thread local, so that FileFactory can use it.
    ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
    CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(schemaTableName, location, configuration);
    Expression filters = PrestoFilterUtil.parseFilterExpression(predicate);
    try {
        List<CarbonLocalMultiBlockSplit> splits = carbonTableReader.getInputSplits(cache, filters, filteredPartitions, configuration);
        ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
        long index = 0;
        for (CarbonLocalMultiBlockSplit split : splits) {
            index++;
            Properties properties = new Properties();
            for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
                properties.setProperty(entry.getKey(), entry.getValue());
            }
            properties.setProperty("tablePath", cache.getCarbonTable().getTablePath());
            properties.setProperty("carbonSplit", split.getJsonString());
            properties.setProperty("queryId", queryId);
            properties.setProperty("index", String.valueOf(index));
            cSplits.add(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), cache.getCarbonTable().getTablePath(), 0, 0, 0, 0, properties, new ArrayList<>(), getHostAddresses(split.getLocations()), OptionalInt.empty(), false, TableToPartitionMapping.empty(), Optional.empty(), false, Optional.empty()));
        }
        statisticRecorder.logStatisticsAsTableDriver();
        statistic.addStatistics(QueryStatisticsConstants.BLOCK_IDENTIFICATION, System.currentTimeMillis());
        statisticRecorder.recordStatisticsForDriver(statistic, queryId);
        statisticRecorder.logStatisticsAsTableDriver();
        return new FixedSplitSource(cSplits.build());
    } catch (Exception ex) {
        throw new RuntimeException(ex.getMessage(), ex);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Properties(java.util.Properties) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) Path(org.apache.hadoop.fs.Path) Table(io.prestosql.plugin.hive.metastore.Table) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) HiveSplit(io.prestosql.plugin.hive.HiveSplit) Expression(org.apache.carbondata.core.scan.expression.Expression) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) CarbonLocalMultiBlockSplit(org.apache.carbondata.presto.impl.CarbonLocalMultiBlockSplit) Map(java.util.Map) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) HivePartition(io.prestosql.plugin.hive.HivePartition)

Example 2 with HiveIdentity

use of io.prestosql.plugin.hive.authentication.HiveIdentity in project carbondata by apache.

the class CarbonDataPageSinkProvider method createPageSink.

private ConnectorPageSink createPageSink(HiveWritableTableHandle handle, ConnectorSession session, Map<String, String> additionalTableParameters, Map<String, String> additionalConf, boolean isCreateTable) {
    OptionalInt bucketCount = OptionalInt.empty();
    List<SortingColumn> sortedBy = ImmutableList.of();
    if (handle.getBucketProperty().isPresent()) {
        bucketCount = OptionalInt.of(handle.getBucketProperty().get().getBucketCount());
        sortedBy = handle.getBucketProperty().get().getSortedBy();
    }
    CarbonDataWriterFactory carbonDataWriterFactory = new CarbonDataWriterFactory(fileWriterFactories, handle.getSchemaName(), handle.getTableName(), isCreateTable, handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionStorageFormat(), additionalTableParameters, bucketCount, sortedBy, handle.getLocationHandle(), locationService, session.getQueryId(), new HivePageSinkMetadataProvider(handle.getPageSinkMetadata(), new HiveMetastoreClosure(memoizeMetastore(metastore, perTransactionMetastoreCacheMaximumSize)), new HiveIdentity(session)), typeManager, hdfsEnvironment, pageSorter, writerSortBufferSize, maxOpenSortFiles, immutablePartitions, session, nodeManager, eventClient, hiveSessionProperties, hiveWriterStats, additionalConf);
    return new HivePageSink(carbonDataWriterFactory, handle.getInputColumns(), handle.getBucketProperty(), pageIndexerFactory, hdfsEnvironment, maxOpenPartitions, writeVerificationExecutor, partitionUpdateCodec, session);
}
Also used : HivePageSinkMetadataProvider(io.prestosql.plugin.hive.metastore.HivePageSinkMetadataProvider) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) HivePageSink(io.prestosql.plugin.hive.HivePageSink) OptionalInt(java.util.OptionalInt) HiveMetastoreClosure(io.prestosql.plugin.hive.HiveMetastoreClosure) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity)

Example 3 with HiveIdentity

use of io.prestosql.plugin.hive.authentication.HiveIdentity in project carbondata by apache.

the class CarbonDataMetaData method beginInsert.

@Override
public CarbonDataInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle) {
    HiveInsertTableHandle hiveInsertTableHandle = super.beginInsert(session, tableHandle);
    SchemaTableName tableName = hiveInsertTableHandle.getSchemaTableName();
    Optional<Table> table = this.metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName());
    Path outputPath = new Path(hiveInsertTableHandle.getLocationHandle().getJsonSerializableTargetPath());
    JobConf jobConf = ConfigurationUtils.toJobConf(this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, hiveInsertTableHandle.getSchemaName(), hiveInsertTableHandle.getTableName()), new Path(hiveInsertTableHandle.getLocationHandle().getJsonSerializableWritePath())));
    jobConf.set("location", outputPath.toString());
    Properties hiveSchema = MetastoreUtil.getHiveSchema(table.get());
    try {
        CarbonLoadModel carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(hiveSchema, jobConf);
        CarbonTableOutputFormat.setLoadModel(jobConf, carbonLoadModel);
    } catch (IOException ex) {
        LOG.error("Error while creating carbon load model", ex);
        throw new RuntimeException(ex);
    }
    try {
        carbonOutputCommitter = new MapredCarbonOutputCommitter();
        jobContext = new JobContextImpl(jobConf, new JobID());
        carbonOutputCommitter.setupJob(jobContext);
        ThreadLocalSessionInfo.setConfigurationToCurrentThread(jobConf);
    } catch (IOException e) {
        LOG.error("error setting the output committer", e);
        throw new RuntimeException("error setting the output committer");
    }
    return new CarbonDataInsertTableHandle(hiveInsertTableHandle.getSchemaTableName().getSchemaName(), hiveInsertTableHandle.getTableName(), hiveInsertTableHandle.getInputColumns(), hiveInsertTableHandle.getPageSinkMetadata(), hiveInsertTableHandle.getLocationHandle(), hiveInsertTableHandle.getBucketProperty(), hiveInsertTableHandle.getTableStorageFormat(), hiveInsertTableHandle.getPartitionStorageFormat(), ImmutableMap.of(CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL, jobContext.getConfiguration().get(CarbonTableOutputFormat.LOAD_MODEL)));
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Table(io.prestosql.plugin.hive.metastore.Table) IOException(java.io.IOException) Properties(java.util.Properties) MapredCarbonOutputCommitter(org.apache.carbondata.hive.MapredCarbonOutputCommitter) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) HiveInsertTableHandle(io.prestosql.plugin.hive.HiveInsertTableHandle) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapred.JobID)

Aggregations

HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)3 Table (io.prestosql.plugin.hive.metastore.Table)2 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)2 Properties (java.util.Properties)2 Path (org.apache.hadoop.fs.Path)2 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 HdfsEnvironment (io.prestosql.plugin.hive.HdfsEnvironment)1 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)1 HiveInsertTableHandle (io.prestosql.plugin.hive.HiveInsertTableHandle)1 HiveMetastoreClosure (io.prestosql.plugin.hive.HiveMetastoreClosure)1 HivePageSink (io.prestosql.plugin.hive.HivePageSink)1 HivePartition (io.prestosql.plugin.hive.HivePartition)1 HiveSplit (io.prestosql.plugin.hive.HiveSplit)1 HiveTableHandle (io.prestosql.plugin.hive.HiveTableHandle)1 HivePageSinkMetadataProvider (io.prestosql.plugin.hive.metastore.HivePageSinkMetadataProvider)1 SemiTransactionalHiveMetastore (io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore)1 SortingColumn (io.prestosql.plugin.hive.metastore.SortingColumn)1 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)1 FixedSplitSource (io.prestosql.spi.connector.FixedSplitSource)1