Search in sources :

Example 6 with HivePartition

use of com.facebook.presto.hive.HivePartition in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method getPartitionsSample.

@VisibleForTesting
static List<HivePartition> getPartitionsSample(List<HivePartition> partitions, int sampleSize) {
    checkArgument(sampleSize > 0, "sampleSize is expected to be greater than zero");
    if (partitions.size() <= sampleSize) {
        return partitions;
    }
    List<HivePartition> result = new ArrayList<>();
    int samplesLeft = sampleSize;
    HivePartition min = partitions.get(0);
    HivePartition max = partitions.get(0);
    for (HivePartition partition : partitions) {
        if (partition.getPartitionId().compareTo(min.getPartitionId()) < 0) {
            min = partition;
        } else if (partition.getPartitionId().compareTo(max.getPartitionId()) > 0) {
            max = partition;
        }
    }
    result.add(min);
    samplesLeft--;
    if (samplesLeft > 0) {
        result.add(max);
        samplesLeft--;
    }
    if (samplesLeft > 0) {
        HashFunction hashFunction = murmur3_128();
        Comparator<Map.Entry<HivePartition, Long>> hashComparator = Comparator.<Map.Entry<HivePartition, Long>, Long>comparing(Map.Entry::getValue).thenComparing(entry -> entry.getKey().getPartitionId());
        partitions.stream().filter(partition -> !result.contains(partition)).map(partition -> immutableEntry(partition, hashFunction.hashUnencodedChars(partition.getPartitionId()).asLong())).sorted(hashComparator).limit(samplesLeft).forEachOrdered(entry -> result.add(entry.getKey()));
    }
    return unmodifiableList(result);
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) Collections.unmodifiableList(java.util.Collections.unmodifiableList) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveSessionProperties.isStatisticsEnabled(com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled) BigDecimal(java.math.BigDecimal) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) IntegerStatistics(com.facebook.presto.hive.metastore.IntegerStatistics) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) Double.parseDouble(java.lang.Double.parseDouble) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Decimals.isLongDecimal(com.facebook.presto.common.type.Decimals.isLongDecimal) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Objects(java.util.Objects) DateStatistics(com.facebook.presto.hive.metastore.DateStatistics) List(java.util.List) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) HiveSessionProperties.isIgnoreCorruptedStatistics(com.facebook.presto.hive.HiveSessionProperties.isIgnoreCorruptedStatistics) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) LocalDate(java.time.LocalDate) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HashFunction(com.google.common.hash.HashFunction) Logger(com.facebook.airlift.log.Logger) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) OptionalDouble(java.util.OptionalDouble) Shorts(com.google.common.primitives.Shorts) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) PrestoException(com.facebook.presto.spi.PrestoException) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) OptionalLong(java.util.OptionalLong) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Double.isFinite(java.lang.Double.isFinite) HIVE_CORRUPTED_COLUMN_STATISTICS(com.facebook.presto.hive.HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) DoubleStatistics(com.facebook.presto.hive.metastore.DoubleStatistics) SignedBytes(com.google.common.primitives.SignedBytes) Decimals(com.facebook.presto.common.type.Decimals) Hashing.murmur3_128(com.google.common.hash.Hashing.murmur3_128) Ints(com.google.common.primitives.Ints) HiveSessionProperties.getPartitionStatisticsSampleSize(com.facebook.presto.hive.HiveSessionProperties.getPartitionStatisticsSampleSize) HivePartition(com.facebook.presto.hive.HivePartition) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Double.isNaN(java.lang.Double.isNaN) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) Estimate(com.facebook.presto.spi.statistics.Estimate) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) HashFunction(com.google.common.hash.HashFunction) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HivePartition(com.facebook.presto.hive.HivePartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 7 with HivePartition

use of com.facebook.presto.hive.HivePartition in project carbondata by apache.

the class CarbondataSplitManager method getSplits.

public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layoutHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
    HiveTableLayoutHandle layout = (HiveTableLayoutHandle) layoutHandle;
    SchemaTableName schemaTableName = layout.getSchemaTableName();
    carbonTableReader.setPrestoQueryId(session.getQueryId());
    // get table metadata
    SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
    Table table = metastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    if (!table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
        return super.getSplits(transactionHandle, session, layoutHandle, splitSchedulingStrategy);
    }
    // for hive metastore, get table location from catalog table's tablePath
    String location = table.getStorage().getSerdeParameters().get("tablePath");
    if (StringUtils.isEmpty(location)) {
        // file metastore case tablePath can be null, so get from location
        location = table.getStorage().getLocation();
    }
    List<PartitionSpec> filteredPartitions = new ArrayList<>();
    if (layout.getPartitionColumns().size() > 0 && layout.getPartitions().isPresent()) {
        List<String> colNames = layout.getPartitionColumns().stream().map(x -> ((HiveColumnHandle) x).getName()).collect(Collectors.toList());
        for (HivePartition partition : layout.getPartitions().get()) {
            filteredPartitions.add(new PartitionSpec(colNames, location + CarbonCommonConstants.FILE_SEPARATOR + partition.getPartitionId()));
        }
    }
    String queryId = System.nanoTime() + "";
    QueryStatistic statistic = new QueryStatistic();
    QueryStatisticsRecorder statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis());
    statisticRecorder.recordStatisticsForDriver(statistic, queryId);
    statistic = new QueryStatistic();
    carbonTableReader.setQueryId(queryId);
    TupleDomain<HiveColumnHandle> predicate = (TupleDomain<HiveColumnHandle>) layout.getCompactEffectivePredicate();
    Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, schemaTableName.getSchemaName(), schemaTableName.getTableName()), new Path(location));
    configuration = carbonTableReader.updateS3Properties(configuration);
    for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
        configuration.set(entry.getKey(), entry.getValue());
    }
    // set the hadoop configuration to thread local, so that FileFactory can use it.
    ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
    CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(schemaTableName, location, configuration);
    Expression filters = PrestoFilterUtil.parseFilterExpression(predicate);
    try {
        List<CarbonLocalMultiBlockSplit> splits = carbonTableReader.getInputSplits(cache, filters, filteredPartitions, configuration);
        ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
        long index = 0;
        for (CarbonLocalMultiBlockSplit split : splits) {
            index++;
            Properties properties = new Properties();
            for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
                properties.setProperty(entry.getKey(), entry.getValue());
            }
            properties.setProperty("tablePath", cache.getCarbonTable().getTablePath());
            properties.setProperty("carbonSplit", split.getJsonString());
            properties.setProperty("queryId", queryId);
            properties.setProperty("index", String.valueOf(index));
            cSplits.add(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), "", 0, 0, 0, properties, new ArrayList(), getHostAddresses(split.getLocations()), OptionalInt.empty(), false, predicate, new HashMap<>(), Optional.empty(), false));
        }
        statisticRecorder.logStatisticsAsTableDriver();
        statistic.addStatistics(QueryStatisticsConstants.BLOCK_IDENTIFICATION, System.currentTimeMillis());
        statisticRecorder.recordStatisticsForDriver(statistic, queryId);
        statisticRecorder.logStatisticsAsTableDriver();
        return new FixedSplitSource(cSplits.build());
    } catch (Exception ex) {
        throw new RuntimeException(ex.getMessage(), ex);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Arrays(java.util.Arrays) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) CarbonLocalMultiBlockSplit(org.apache.carbondata.presto.impl.CarbonLocalMultiBlockSplit) HiveTableLayoutHandle(com.facebook.presto.hive.HiveTableLayoutHandle) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) StringUtils(org.apache.commons.lang3.StringUtils) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) CarbonTableReader(org.apache.carbondata.presto.impl.CarbonTableReader) HiveTransactionHandle(com.facebook.presto.hive.HiveTransactionHandle) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) ForHiveClient(com.facebook.presto.hive.ForHiveClient) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) Expression(org.apache.carbondata.core.scan.expression.Expression) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HostAddress(com.facebook.presto.spi.HostAddress) DirectoryLister(com.facebook.presto.hive.DirectoryLister) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Collectors(java.util.stream.Collectors) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) List(java.util.List) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) Table(com.facebook.presto.hive.metastore.Table) HiveSplitManager(com.facebook.presto.hive.HiveSplitManager) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) HashMap(java.util.HashMap) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) ThreadLocalSessionInfo(org.apache.carbondata.core.util.ThreadLocalSessionInfo) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) CarbonTimeStatisticsFactory(org.apache.carbondata.core.util.CarbonTimeStatisticsFactory) NamenodeStats(com.facebook.presto.hive.NamenodeStats) ExecutorService(java.util.concurrent.ExecutorService) Properties(java.util.Properties) CoercionPolicy(com.facebook.presto.hive.CoercionPolicy) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) QueryStatisticsConstants(org.apache.carbondata.core.stats.QueryStatisticsConstants) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HivePartition(com.facebook.presto.hive.HivePartition) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnHandle(com.facebook.presto.spi.ColumnHandle) HiveSplit(com.facebook.presto.hive.HiveSplit) Configuration(org.apache.hadoop.conf.Configuration) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Properties(java.util.Properties) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) HiveTableLayoutHandle(com.facebook.presto.hive.HiveTableLayoutHandle) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) Path(org.apache.hadoop.fs.Path) Table(com.facebook.presto.hive.metastore.Table) SchemaTableName(com.facebook.presto.spi.SchemaTableName) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) HiveSplit(com.facebook.presto.hive.HiveSplit) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Expression(org.apache.carbondata.core.scan.expression.Expression) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) CarbonLocalMultiBlockSplit(org.apache.carbondata.presto.impl.CarbonLocalMultiBlockSplit) Map(java.util.Map) HashMap(java.util.HashMap) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HivePartition(com.facebook.presto.hive.HivePartition)

Aggregations

HivePartition (com.facebook.presto.hive.HivePartition)7 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)4 NullableValue (com.facebook.presto.common.predicate.NullableValue)3 HiveBasicStatistics (com.facebook.presto.hive.HiveBasicStatistics)3 Logger (com.facebook.airlift.log.Logger)2 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)2 Chars.isCharType (com.facebook.presto.common.type.Chars.isCharType)2 DATE (com.facebook.presto.common.type.DateType.DATE)2 DecimalType (com.facebook.presto.common.type.DecimalType)2 Decimals (com.facebook.presto.common.type.Decimals)2 Decimals.isLongDecimal (com.facebook.presto.common.type.Decimals.isLongDecimal)2 Decimals.isShortDecimal (com.facebook.presto.common.type.Decimals.isShortDecimal)2 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)2 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)2 REAL (com.facebook.presto.common.type.RealType.REAL)2 SMALLINT (com.facebook.presto.common.type.SmallintType.SMALLINT)2 TINYINT (com.facebook.presto.common.type.TinyintType.TINYINT)2 Type (com.facebook.presto.common.type.Type)2 Varchars.isVarcharType (com.facebook.presto.common.type.Varchars.isVarcharType)2 HiveClientConfig (com.facebook.presto.hive.HiveClientConfig)2