Search in sources :

Example 1 with QueryType

use of io.prestosql.spi.resourcegroups.QueryType in project hetu-core by openlookeng.

the class HiveSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, Supplier<List<Set<DynamicFilter>>> dynamicFilterSupplier, Optional<QueryType> queryType, Map<String, Object> queryInfo, Set<TupleDomain<ColumnMetadata>> userDefinedCachePredicates, boolean partOfReuse) {
    HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = hiveTable.getSchemaTableName();
    // get table metadata
    SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transaction);
    Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    if (table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
        throw new PrestoException(NOT_SUPPORTED, "Hive connector can't read carbondata tables");
    }
    // verify table is not marked as non-readable
    String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
    if (!isNullOrEmpty(tableNotReadable)) {
        throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
    }
    // get partitions
    List<HivePartition> partitions = partitionManager.getOrLoadPartitions(session, metastore, new HiveIdentity(session), hiveTable);
    // short circuit if we don't have any partitions
    if (partitions.isEmpty()) {
        return new FixedSplitSource(ImmutableList.of());
    }
    // get buckets from first partition (arbitrary)
    Optional<HiveBucketing.HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
    // validate bucket bucketed execution
    Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
    if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && !bucketHandle.isPresent()) {
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
    }
    // sort partitions
    partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
    Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
    HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hivePartitions, hiveTable.getCompactEffectivePredicate(), BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, splitLoaderConcurrency, recursiveDfsWalkerEnabled, metastore.getValidWriteIds(session, hiveTable, queryType.map(t -> t == QueryType.VACUUM).orElse(false)).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), dynamicFilterSupplier, queryType, queryInfo, typeManager);
    HiveSplitSource splitSource;
    HiveStorageFormat hiveStorageFormat = HiveMetadata.extractHiveStorageFormat(table);
    switch(splitSchedulingStrategy) {
        case UNGROUPED_SCHEDULING:
            splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), // For reuse, we should make sure to have same split size all time for a table.
            partOfReuse ? 0 : maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, new CounterStat(), dynamicFilterSupplier, userDefinedCachePredicates, typeManager, hiveConfig, hiveStorageFormat);
            break;
        case GROUPED_SCHEDULING:
            splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), // For reuse, we should make sure to have same split size all time for a table.
            partOfReuse ? 0 : maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, new CounterStat(), dynamicFilterSupplier, userDefinedCachePredicates, typeManager, hiveConfig, hiveStorageFormat);
            break;
        default:
            throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
    }
    hiveSplitLoader.start(splitSource);
    if (queryType.isPresent() && queryType.get() == QueryType.VACUUM) {
        HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName());
        return new HiveVacuumSplitSource(splitSource, (HiveVacuumTableHandle) queryInfo.get("vacuumHandle"), hdfsEnvironment, hdfsContext, session);
    }
    return splitSource;
}
Also used : VersionEmbedder(io.prestosql.spi.VersionEmbedder) GROUPED_SCHEDULING(io.prestosql.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING) Iterables.transform(com.google.common.collect.Iterables.transform) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) QueryType(io.prestosql.spi.resourcegroups.QueryType) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorSplitManager(io.prestosql.spi.connector.ConnectorSplitManager) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) SERVER_SHUTTING_DOWN(io.prestosql.spi.StandardErrorCode.SERVER_SHUTTING_DOWN) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) Iterables.concat(com.google.common.collect.Iterables.concat) Map(java.util.Map) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) Math.min(java.lang.Math.min) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) Table(io.prestosql.plugin.hive.metastore.Table) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) Nested(org.weakref.jmx.Nested) Partition(io.prestosql.plugin.hive.metastore.Partition) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) CounterStat(io.airlift.stats.CounterStat) Function(java.util.function.Function) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Managed(org.weakref.jmx.Managed) Objects.requireNonNull(java.util.Objects.requireNonNull) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) AbstractIterator(com.google.common.collect.AbstractIterator) TypeManager(io.prestosql.spi.type.TypeManager) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Ordering(com.google.common.collect.Ordering) Column(io.prestosql.plugin.hive.metastore.Column) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) CounterStat(io.airlift.stats.CounterStat) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) Table(io.prestosql.plugin.hive.metastore.Table) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Example 2 with QueryType

use of io.prestosql.spi.resourcegroups.QueryType in project hetu-core by openlookeng.

the class QueryPreparer method prepareQuery.

public PreparedQuery prepareQuery(Session session, Statement wrappedStatement) throws ParsingException, PrestoException, SemanticException {
    Statement statement = wrappedStatement;
    Optional<String> prepareSql = Optional.empty();
    if (statement instanceof Execute) {
        prepareSql = Optional.of(session.getPreparedStatementFromExecute((Execute) statement));
        statement = sqlParser.createStatement(prepareSql.get(), createParsingOptions(session));
    }
    if (statement instanceof Explain && ((Explain) statement).isAnalyze()) {
        Statement innerStatement = ((Explain) statement).getStatement();
        Optional<QueryType> innerQueryType = StatementUtils.getQueryType(innerStatement.getClass());
        if (!innerQueryType.isPresent() || innerQueryType.get() == QueryType.DATA_DEFINITION) {
            throw new PrestoException(NOT_SUPPORTED, "EXPLAIN ANALYZE doesn't support statement type: " + innerStatement.getClass().getSimpleName());
        }
    }
    List<Expression> parameters = ImmutableList.of();
    if (wrappedStatement instanceof Execute) {
        parameters = ((Execute) wrappedStatement).getParameters();
    }
    validateParameters(statement, parameters);
    return new PreparedQuery(statement, parameters, prepareSql);
}
Also used : Execute(io.prestosql.sql.tree.Execute) Expression(io.prestosql.sql.tree.Expression) Statement(io.prestosql.sql.tree.Statement) Explain(io.prestosql.sql.tree.Explain) PrestoException(io.prestosql.spi.PrestoException) QueryType(io.prestosql.spi.resourcegroups.QueryType)

Example 3 with QueryType

use of io.prestosql.spi.resourcegroups.QueryType in project hetu-core by openlookeng.

the class TestBackgroundHiveSplitLoader method testAbortedTransactions.

private void testAbortedTransactions(ImmutableMap<String, String> tableParameters, Optional<QueryType> queryType, ImmutableMap<String, Object> queryInfo) throws Exception {
    java.nio.file.Path tablePath = Files.createTempDirectory(UUID.randomUUID().toString());
    Table table = table(tablePath.toString(), ImmutableList.of(), Optional.empty(), tableParameters);
    List<String> filePaths = ImmutableList.of(tablePath + "/delta_0000001_0000001_0000/_orc_acid_version", tablePath + "/delta_0000001_0000001_0000/bucket_00000", tablePath + "/delta_0000002_0000002_0000/_orc_acid_version", tablePath + "/delta_0000002_0000002_0000/bucket_00000", tablePath + "/delta_0000003_0000003_0000/_orc_acid_version", tablePath + "/delta_0000003_0000003_0000/bucket_00000");
    try {
        for (String path : filePaths) {
            File file = new File(path);
            assertTrue(file.getParentFile().exists() || file.getParentFile().mkdirs(), "Failed creating directory " + file.getParentFile());
            createOrcAcidFile(file);
        }
        // ValidWriteIdList is of format <currentTxn>$<schema>.<table>:<highWatermark>:<minOpenWriteId>::<AbortedTxns>
        // This writeId list has high watermark transaction=3 and aborted transaction=2
        String validWriteIdsList = format("4$%s.%s:3:9223372036854775807::2", table.getDatabaseName(), table.getTableName());
        BackgroundHiveSplitLoader backgroundHiveSplitLoader = backgroundHiveSplitLoader(createTestHdfsEnvironment(new HiveConfig()), TupleDomain.none(), Optional.empty(), table, Optional.empty(), Optional.of(new ValidReaderWriteIdList(validWriteIdsList)), queryType, queryInfo);
        HiveSplitSource hiveSplitSource = hiveSplitSource(backgroundHiveSplitLoader);
        backgroundHiveSplitLoader.start(hiveSplitSource);
        List<String> splits = drain(hiveSplitSource);
        assertTrue(splits.stream().anyMatch(p -> p.contains(filePaths.get(1))), format("%s not found in splits %s", filePaths.get(1), splits));
        assertTrue(splits.stream().anyMatch(p -> p.contains(filePaths.get(5))), format("%s not found in splits %s", filePaths.get(5), splits));
        assertFalse(splits.stream().anyMatch(p -> p.contains(filePaths.get(3))), format("Aborted txn %s found in splits %s", filePaths.get(3), splits));
    } finally {
        Files.walk(tablePath).sorted(Comparator.reverseOrder()).map(java.nio.file.Path::toFile).forEach(File::delete);
    }
}
Also used : StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) Progressable(org.apache.hadoop.util.Progressable) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) QueryType(io.prestosql.spi.resourcegroups.QueryType) BlockLocation(org.apache.hadoop.fs.BlockLocation) FileSystem(org.apache.hadoop.fs.FileSystem) NoHdfsAuthentication(io.prestosql.plugin.hive.authentication.NoHdfsAuthentication) Test(org.testng.annotations.Test) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Duration(io.airlift.units.Duration) Future(java.util.concurrent.Future) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) Path(org.apache.hadoop.fs.Path) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) URI(java.net.URI) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) Assert.assertFalse(org.testng.Assert.assertFalse) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveTestUtils.createTestHdfsEnvironment(io.prestosql.plugin.hive.HiveTestUtils.createTestHdfsEnvironment) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) UUID(java.util.UUID) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) CountDownLatch(java.util.concurrent.CountDownLatch) DataSize(io.airlift.units.DataSize) List(java.util.List) Table(io.prestosql.plugin.hive.metastore.Table) HiveColumnHandle.pathColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.pathColumnHandle) Domain(io.prestosql.spi.predicate.Domain) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) NOT_PARTITIONED(io.prestosql.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Partition(io.prestosql.plugin.hive.metastore.Partition) DataProvider(org.testng.annotations.DataProvider) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Assert.assertEquals(org.testng.Assert.assertEquals) CounterStat(io.airlift.stats.CounterStat) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) ColumnType(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType) HiveUtil.getRegularColumnHandles(io.prestosql.plugin.hive.HiveUtil.getRegularColumnHandles) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveTestUtils.createTestDynamicFilterSupplier(io.prestosql.plugin.hive.HiveTestUtils.createTestDynamicFilterSupplier) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) GIGABYTE(io.airlift.units.DataSize.Unit.GIGABYTE) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Assert.assertThrows(org.testng.Assert.assertThrows) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) ExecutorService(java.util.concurrent.ExecutorService) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) Iterator(java.util.Iterator) Files(java.nio.file.Files) UTF_8(java.nio.charset.StandardCharsets.UTF_8) TupleDomain(io.prestosql.spi.predicate.TupleDomain) AbstractIterator(com.google.common.collect.AbstractIterator) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) BUCKETING_V1(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion.BUCKETING_V1) Storage(io.prestosql.plugin.hive.metastore.Storage) Paths(java.nio.file.Paths) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) Column(io.prestosql.plugin.hive.metastore.Column) MoreFiles(com.google.common.io.MoreFiles) TableType(org.apache.hadoop.hive.metastore.TableType) Assert.assertTrue(org.testng.Assert.assertTrue) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) Comparator(java.util.Comparator) Collections(java.util.Collections) Table(io.prestosql.plugin.hive.metastore.Table) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) File(java.io.File)

Aggregations

PrestoException (io.prestosql.spi.PrestoException)3 QueryType (io.prestosql.spi.resourcegroups.QueryType)3 AbstractIterator (com.google.common.collect.AbstractIterator)2 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 CounterStat (io.airlift.stats.CounterStat)2 DataSize (io.airlift.units.DataSize)2 Column (io.prestosql.plugin.hive.metastore.Column)2 Partition (io.prestosql.plugin.hive.metastore.Partition)2 Table (io.prestosql.plugin.hive.metastore.Table)2 NOT_SUPPORTED (io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED)2 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)2 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)2 TupleDomain (io.prestosql.spi.predicate.TupleDomain)2 MoreObjects.firstNonNull (com.google.common.base.MoreObjects.firstNonNull)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 Strings.isNullOrEmpty (com.google.common.base.Strings.isNullOrEmpty)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1