Search in sources :

Example 16 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class StoragePartitionLoader method loadPartition.

@Override
public ListenableFuture<?> loadPartition(HivePartitionMetadata partition, HiveSplitSource hiveSplitSource, boolean stopped) throws IOException {
    String partitionName = partition.getHivePartition().getPartitionId();
    Storage storage = partition.getPartition().map(Partition::getStorage).orElse(table.getStorage());
    Properties schema = getPartitionSchema(table, partition.getPartition());
    String inputFormatName = storage.getStorageFormat().getInputFormat();
    int partitionDataColumnCount = partition.getPartition().map(p -> p.getColumns().size()).orElse(table.getDataColumns().size());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition(), partitionName);
    String location = getPartitionLocation(table, partition.getPartition());
    if (location.isEmpty()) {
        checkState(!shouldCreateFilesForMissingBuckets(table, session), "Empty location is only allowed for empty temporary table when zero-row file is not created");
        return COMPLETED_FUTURE;
    }
    Path path = new Path(location);
    Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, inputFormatName, false);
    ExtendedFileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, path);
    boolean s3SelectPushdownEnabled = shouldEnablePushdownForTable(session, table, path.toString(), partition.getPartition());
    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (tableBucketInfo.isPresent()) {
            throw new PrestoException(NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }
        // TODO: This should use an iterator like the HiveFileIterator
        ListenableFuture<?> lastResult = COMPLETED_FUTURE;
        for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
            // The input should be in TextInputFormat.
            TextInputFormat targetInputFormat = new TextInputFormat();
            // the splits must be generated using the file system for the target path
            // get the configuration for the target path -- it may be a different hdfs instance
            ExtendedFileSystem targetFilesystem = hdfsEnvironment.getFileSystem(hdfsContext, targetPath);
            JobConf targetJob = toJobConf(targetFilesystem.getConf());
            targetJob.setInputFormat(TextInputFormat.class);
            targetInputFormat.configure(targetJob);
            FileInputFormat.setInputPaths(targetJob, targetPath);
            InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);
            InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(targetFilesystem, inputFormat, pathDomain, getNodeSelectionStrategy(session), getMaxInitialSplitSize(session), s3SelectPushdownEnabled, new HiveSplitPartitionInfo(storage, path.toUri(), partitionKeys, partitionName, partitionDataColumnCount, partition.getTableToPartitionMapping(), Optional.empty(), partition.getRedundantColumnDomains()), schedulerUsesHostAddresses, partition.getEncryptionInformation());
            lastResult = addSplitsToSource(targetSplits, splitFactory, hiveSplitSource, stopped);
            if (stopped) {
                return COMPLETED_FUTURE;
            }
        }
        return lastResult;
    }
    Optional<HiveSplit.BucketConversion> bucketConversion = Optional.empty();
    boolean bucketConversionRequiresWorkerParticipation = false;
    if (partition.getPartition().isPresent()) {
        Optional<HiveBucketProperty> partitionBucketProperty = partition.getPartition().get().getStorage().getBucketProperty();
        if (tableBucketInfo.isPresent() && partitionBucketProperty.isPresent()) {
            int tableBucketCount = tableBucketInfo.get().getTableBucketCount();
            int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
            // Here, it's just trying to see if its needs the BucketConversion.
            if (tableBucketCount != partitionBucketCount) {
                bucketConversion = Optional.of(new HiveSplit.BucketConversion(tableBucketCount, partitionBucketCount, tableBucketInfo.get().getBucketColumns()));
                if (tableBucketCount > partitionBucketCount) {
                    bucketConversionRequiresWorkerParticipation = true;
                }
            }
        }
    }
    InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(fs, inputFormat, pathDomain, getNodeSelectionStrategy(session), getMaxInitialSplitSize(session), s3SelectPushdownEnabled, new HiveSplitPartitionInfo(storage, path.toUri(), partitionKeys, partitionName, partitionDataColumnCount, partition.getTableToPartitionMapping(), bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(), partition.getRedundantColumnDomains()), schedulerUsesHostAddresses, partition.getEncryptionInformation());
    if (shouldUseFileSplitsFromInputFormat(inputFormat, configuration, table.getStorage().getLocation())) {
        if (tableBucketInfo.isPresent()) {
            throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName());
        }
        JobConf jobConf = toJobConf(configuration);
        FileInputFormat.setInputPaths(jobConf, path);
        // SerDes parameters and Table parameters passing into input format
        fromProperties(schema).forEach(jobConf::set);
        InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
        return addSplitsToSource(splits, splitFactory, hiveSplitSource, stopped);
    }
    PathFilter pathFilter = isHudiParquetInputFormat(inputFormat) ? hoodiePathFilterLoadingCache.getUnchecked(configuration) : path1 -> true;
    // Streaming aggregation works at the granularity of individual files
    // S3 Select pushdown works at the granularity of individual S3 objects,
    // Partial aggregation pushdown works at the granularity of individual files
    // therefore we must not split files when either is enabled.
    // Skip header / footer lines are not splittable except for a special case when skip.header.line.count=1
    boolean splittable = isFileSplittable(session) && !isStreamingAggregationEnabled(session) && !s3SelectPushdownEnabled && !partialAggregationsPushedDown && getFooterCount(schema) == 0 && getHeaderCount(schema) <= 1;
    // Bucketed partitions are fully loaded immediately since all files must be loaded to determine the file to bucket mapping
    if (tableBucketInfo.isPresent()) {
        if (tableBucketInfo.get().isVirtuallyBucketed()) {
            // For virtual bucket, bucket conversion must not be present because there is no physical partition bucket count
            checkState(!bucketConversion.isPresent(), "Virtually bucketed table must not have partitions that are physically bucketed");
            checkState(tableBucketInfo.get().getTableBucketCount() == tableBucketInfo.get().getReadBucketCount(), "Table and read bucket count should be the same for virtual bucket");
            return hiveSplitSource.addToQueue(getVirtuallyBucketedSplits(path, fs, splitFactory, tableBucketInfo.get().getReadBucketCount(), splittable, pathFilter));
        }
        return hiveSplitSource.addToQueue(getBucketedSplits(path, fs, splitFactory, tableBucketInfo.get(), bucketConversion, partitionName, splittable, pathFilter));
    }
    fileIterators.addLast(createInternalHiveSplitIterator(path, fs, splitFactory, splittable, pathFilter, partition.getPartition()));
    return COMPLETED_FUTURE;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) LoadingCache(com.google.common.cache.LoadingCache) ListMultimap(com.google.common.collect.ListMultimap) HiveSessionProperties.isFileSplittable(com.facebook.presto.hive.HiveSessionProperties.isFileSplittable) HiveSessionProperties.isUseListDirectoryCache(com.facebook.presto.hive.HiveSessionProperties.isUseListDirectoryCache) FileStatus(org.apache.hadoop.fs.FileStatus) IntPredicate(java.util.function.IntPredicate) HiveUtil.getHeaderCount(com.facebook.presto.hive.HiveUtil.getHeaderCount) HiveUtil.getInputFormat(com.facebook.presto.hive.HiveUtil.getInputFormat) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) FileSplit(org.apache.hadoop.mapred.FileSplit) CharStreams(com.google.common.io.CharStreams) Configuration(org.apache.hadoop.conf.Configuration) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) HiveMetadata.shouldCreateFilesForMissingBuckets(com.facebook.presto.hive.HiveMetadata.shouldCreateFilesForMissingBuckets) HIVE_INVALID_FILE_NAMES(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_FILE_NAMES) Function(com.google.common.base.Function) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) HiveWriterFactory.getBucketNumber(com.facebook.presto.hive.HiveWriterFactory.getBucketNumber) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveSessionProperties.isStreamingAggregationEnabled(com.facebook.presto.hive.HiveSessionProperties.isStreamingAggregationEnabled) S3SelectPushdown.shouldEnablePushdownForTable(com.facebook.presto.hive.S3SelectPushdown.shouldEnablePushdownForTable) StandardCharsets(java.nio.charset.StandardCharsets) String.format(java.lang.String.format) HiveSessionProperties.getMaxInitialSplitSize(com.facebook.presto.hive.HiveSessionProperties.getMaxInitialSplitSize) FAIL(com.facebook.presto.hive.NestedDirectoryPolicy.FAIL) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CacheLoader(com.google.common.cache.CacheLoader) List(java.util.List) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) HIDDEN_FILES_PATH_FILTER(org.apache.hadoop.hive.common.FileUtils.HIDDEN_FILES_PATH_FILTER) Optional(java.util.Optional) Math.max(java.lang.Math.max) CacheBuilder(com.google.common.cache.CacheBuilder) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) Table(com.facebook.presto.hive.metastore.Table) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) HiveUtil.getFooterCount(com.facebook.presto.hive.HiveUtil.getFooterCount) PathFilter(org.apache.hadoop.fs.PathFilter) MetastoreUtil.getPartitionLocation(com.facebook.presto.hive.metastore.MetastoreUtil.getPartitionLocation) PrestoException(com.facebook.presto.spi.PrestoException) Deque(java.util.Deque) HiveSessionProperties.getNodeSelectionStrategy(com.facebook.presto.hive.HiveSessionProperties.getNodeSelectionStrategy) OptionalInt(java.util.OptionalInt) Iterators(com.google.common.collect.Iterators) Partition(com.facebook.presto.hive.metastore.Partition) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) ArrayList(java.util.ArrayList) IGNORED(com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED) ImmutableList(com.google.common.collect.ImmutableList) HiveUtil.shouldUseFileSplitsFromInputFormat(com.facebook.presto.hive.HiveUtil.shouldUseFileSplitsFromInputFormat) ConfigurationUtils.toJobConf(com.facebook.presto.hive.util.ConfigurationUtils.toJobConf) Objects.requireNonNull(java.util.Objects.requireNonNull) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) RECURSE(com.facebook.presto.hive.NestedDirectoryPolicy.RECURSE) HIVE_INVALID_BUCKET_FILES(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES) Futures.immediateFuture(com.google.common.util.concurrent.Futures.immediateFuture) Storage(com.facebook.presto.hive.metastore.Storage) Properties(java.util.Properties) Iterator(java.util.Iterator) InternalHiveSplitFactory(com.facebook.presto.hive.util.InternalHiveSplitFactory) HiveColumnHandle.pathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.pathColumnHandle) HoodieROTablePathFilter(org.apache.hudi.hadoop.HoodieROTablePathFilter) IOException(java.io.IOException) HiveFileIterator(com.facebook.presto.hive.util.HiveFileIterator) InputStreamReader(java.io.InputStreamReader) Domain(com.facebook.presto.common.predicate.Domain) JobConf(org.apache.hadoop.mapred.JobConf) Streams.stream(com.google.common.collect.Streams.stream) InputSplit(org.apache.hadoop.mapred.InputSplit) BufferedReader(java.io.BufferedReader) Maps.fromProperties(com.google.common.collect.Maps.fromProperties) Comparator(java.util.Comparator) HiveBucketing.getVirtualBucketNumber(com.facebook.presto.hive.HiveBucketing.getVirtualBucketNumber) HiveUtil.isHudiParquetInputFormat(com.facebook.presto.hive.HiveUtil.isHudiParquetInputFormat) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) PathFilter(org.apache.hadoop.fs.PathFilter) HoodieROTablePathFilter(org.apache.hudi.hadoop.HoodieROTablePathFilter) Configuration(org.apache.hadoop.conf.Configuration) PrestoException(com.facebook.presto.spi.PrestoException) Properties(java.util.Properties) Maps.fromProperties(com.google.common.collect.Maps.fromProperties) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) ConfigurationUtils.toJobConf(com.facebook.presto.hive.util.ConfigurationUtils.toJobConf) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Path(org.apache.hadoop.fs.Path) InternalHiveSplitFactory(com.facebook.presto.hive.util.InternalHiveSplitFactory) Storage(com.facebook.presto.hive.metastore.Storage) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem)

Example 17 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class TestGlueToPrestoConverter method testPartitionConversionMemoization.

@Test
public void testPartitionConversionMemoization() {
    String fakeS3Location = "s3://some-fake-location";
    testPartition.getStorageDescriptor().setLocation(fakeS3Location);
    // Second partition to convert with equal (but not aliased) values
    Partition partitionTwo = getGlueTestPartition(testPartition.getDatabaseName(), testPartition.getTableName(), new ArrayList<>(testPartition.getValues()));
    // Ensure storage fields are equal but not aliased as well
    partitionTwo.getStorageDescriptor().setColumns(new ArrayList<>(testPartition.getStorageDescriptor().getColumns()));
    partitionTwo.getStorageDescriptor().setBucketColumns(new ArrayList<>(testPartition.getStorageDescriptor().getBucketColumns()));
    partitionTwo.getStorageDescriptor().setLocation("" + fakeS3Location);
    partitionTwo.getStorageDescriptor().setInputFormat("" + testPartition.getStorageDescriptor().getInputFormat());
    partitionTwo.getStorageDescriptor().setOutputFormat("" + testPartition.getStorageDescriptor().getOutputFormat());
    partitionTwo.getStorageDescriptor().setParameters(new HashMap<>(testPartition.getStorageDescriptor().getParameters()));
    GluePartitionConverter converter = new GluePartitionConverter(testDb.getName(), testTbl.getName());
    com.facebook.presto.hive.metastore.Partition prestoPartition = converter.apply(testPartition);
    com.facebook.presto.hive.metastore.Partition prestoPartition2 = converter.apply(partitionTwo);
    assertNotSame(prestoPartition, prestoPartition2);
    assertSame(prestoPartition2.getDatabaseName(), prestoPartition.getDatabaseName());
    assertSame(prestoPartition2.getTableName(), prestoPartition.getTableName());
    assertSame(prestoPartition2.getColumns(), prestoPartition.getColumns());
    assertSame(prestoPartition2.getParameters(), prestoPartition.getParameters());
    assertNotSame(prestoPartition2.getValues(), prestoPartition.getValues());
    Storage storage = prestoPartition.getStorage();
    Storage storage2 = prestoPartition2.getStorage();
    assertSame(storage2.getStorageFormat(), storage.getStorageFormat());
    assertSame(storage2.getBucketProperty(), storage.getBucketProperty());
    assertSame(storage2.getSerdeParameters(), storage.getSerdeParameters());
    assertNotSame(storage2.getLocation(), storage.getLocation());
}
Also used : TestingMetastoreObjects.getGlueTestPartition(com.facebook.presto.hive.metastore.glue.TestingMetastoreObjects.getGlueTestPartition) Partition(com.amazonaws.services.glue.model.Partition) Storage(com.facebook.presto.hive.metastore.Storage) GluePartitionConverter(com.facebook.presto.hive.metastore.glue.converter.GlueToPrestoConverter.GluePartitionConverter) Test(org.testng.annotations.Test)

Example 18 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class TestHiveFileFormats method testPageSourceFactory.

private void testPageSourceFactory(HiveBatchPageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) throws IOException {
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
    List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
    List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
    List<HiveColumnHandle> columnHandles = getColumnHandles(testColumns);
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(sourceFactory), new Configuration(), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), columnHandles, ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
    assertTrue(pageSource.isPresent());
    checkPageSource(pageSource.get(), testColumns, getTypes(columnHandles), rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Storage(com.facebook.presto.hive.metastore.Storage) Column(com.facebook.presto.hive.metastore.Column)

Example 19 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class TestHiveFileFormats method testCursorProvider.

private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) {
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
    List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
    List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
    Configuration configuration = new Configuration();
    configuration.set("io.compression.codecs", LzoCodec.class.getName() + "," + LzopCodec.class.getName());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(cursorProvider), ImmutableSet.of(), configuration, session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), getColumnHandles(testColumns), ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
    RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
    checkCursor(cursor, testColumns, rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RecordCursor(com.facebook.presto.spi.RecordCursor) Configuration(org.apache.hadoop.conf.Configuration) LzoCodec(io.airlift.compress.lzo.LzoCodec) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) RecordPageSource(com.facebook.presto.spi.RecordPageSource) Storage(com.facebook.presto.hive.metastore.Storage) Column(com.facebook.presto.hive.metastore.Column)

Aggregations

Storage (com.facebook.presto.hive.metastore.Storage)19 SchemaTableName (com.facebook.presto.spi.SchemaTableName)12 Column (com.facebook.presto.hive.metastore.Column)9 Configuration (org.apache.hadoop.conf.Configuration)9 Test (org.testng.annotations.Test)9 ConnectorSession (com.facebook.presto.spi.ConnectorSession)8 PrestoException (com.facebook.presto.spi.PrestoException)8 List (java.util.List)8 Objects.requireNonNull (java.util.Objects.requireNonNull)8 Optional (java.util.Optional)8 Path (org.apache.hadoop.fs.Path)8 StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)7 ImmutableList (com.google.common.collect.ImmutableList)7 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)7 IOException (java.io.IOException)7 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)6 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)6 DateTimeZone (org.joda.time.DateTimeZone)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 Domain (com.facebook.presto.common.predicate.Domain)5