Search in sources :

Example 11 with TypeManager

use of com.facebook.presto.spi.type.TypeManager in project presto by prestodb.

the class AbstractTestHiveClient method setup.

protected final void setup(String databaseName, HiveClientConfig hiveClientConfig, ExtendedHiveMetastore hiveMetastore) {
    HiveConnectorId connectorId = new HiveConnectorId("hive-test");
    setupHive(connectorId.toString(), databaseName, hiveClientConfig.getTimeZone());
    metastoreClient = hiveMetastore;
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationUpdater(hiveClientConfig, new HiveS3Config()));
    hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hiveClientConfig, new NoHdfsAuthentication());
    locationService = new HiveLocationService(hdfsEnvironment);
    TypeManager typeManager = new TypeRegistry();
    JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
    metadataFactory = new HiveMetadataFactory(connectorId, metastoreClient, hdfsEnvironment, new HivePartitionManager(connectorId, TYPE_MANAGER, hiveClientConfig), timeZone, 10, true, true, false, true, HiveStorageFormat.RCBINARY, 1000, typeManager, locationService, new TableParameterCodec(), partitionUpdateCodec, newFixedThreadPool(2), new HiveTypeTranslator(), TEST_SERVER_VERSION);
    transactionManager = new HiveTransactionManager();
    splitManager = new HiveSplitManager(connectorId, transactionHandle -> ((HiveMetadata) transactionManager.get(transactionHandle)).getMetastore(), new NamenodeStats(), hdfsEnvironment, new HadoopDirectoryLister(), newDirectExecutorService(), new HiveCoercionPolicy(typeManager), 100, hiveClientConfig.getMinPartitionBatchSize(), hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getMaxInitialSplits(), false);
    pageSinkProvider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(hiveClientConfig), hdfsEnvironment, metastoreClient, new GroupByHashPageIndexerFactory(JOIN_COMPILER), typeManager, new HiveClientConfig(), locationService, partitionUpdateCodec);
    pageSourceProvider = new HivePageSourceProvider(hiveClientConfig, hdfsEnvironment, getDefaultHiveRecordCursorProvider(hiveClientConfig), getDefaultHiveDataStreamFactories(hiveClientConfig), TYPE_MANAGER);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) TypeManager(com.facebook.presto.spi.type.TypeManager) Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) FileSystem(org.apache.hadoop.fs.FileSystem) TypeRegistry(com.facebook.presto.type.TypeRegistry) SqlDate(com.facebook.presto.spi.type.SqlDate) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FileStatus(org.apache.hadoop.fs.FileStatus) ROLLBACK_AFTER_BEGIN_INSERT(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) BIGINT(com.facebook.presto.spi.type.BigintType.BIGINT) Sets.difference(com.google.common.collect.Sets.difference) BOOLEAN(com.facebook.presto.spi.type.BooleanType.BOOLEAN) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) ROLLBACK_AFTER_DELETE(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) Map(java.util.Map) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) HIVE_LONG(com.facebook.presto.hive.HiveType.HIVE_LONG) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) StandardTypes(com.facebook.presto.spi.type.StandardTypes) HiveWriteUtils.createDirectory(com.facebook.presto.hive.HiveWriteUtils.createDirectory) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) TINYINT(com.facebook.presto.spi.type.TinyintType.TINYINT) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) PrincipalPrivileges(com.facebook.presto.hive.metastore.PrincipalPrivileges) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ROW(com.facebook.presto.spi.type.StandardTypes.ROW) Domain(com.facebook.presto.spi.predicate.Domain) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) BridgingHiveMetastore(com.facebook.presto.hive.metastore.BridgingHiveMetastore) HivePrivilegeInfo(com.facebook.presto.hive.metastore.HivePrivilegeInfo) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Iterables(com.google.common.collect.Iterables) DOUBLE(com.facebook.presto.spi.type.DoubleType.DOUBLE) Table(com.facebook.presto.hive.metastore.Table) Slice(io.airlift.slice.Slice) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) MoreExecutors.newDirectExecutorService(com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService) HiveUtil.columnExtraInfo(com.facebook.presto.hive.HiveUtil.columnExtraInfo) UTC_KEY(com.facebook.presto.spi.type.TimeZoneKey.UTC_KEY) MapType(com.facebook.presto.type.MapType) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) ROLLBACK_AFTER_SINK_FINISH(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) ARRAY(com.facebook.presto.spi.type.StandardTypes.ARRAY) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) Type(com.facebook.presto.spi.type.Type) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) TIMESTAMP(com.facebook.presto.spi.type.TimestampType.TIMESTAMP) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) TestException(org.testng.TestException) AfterClass(org.testng.annotations.AfterClass) HYPER_LOG_LOG(com.facebook.presto.spi.type.HyperLogLogType.HYPER_LOG_LOG) Constraint(com.facebook.presto.spi.Constraint) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Range(com.facebook.presto.spi.predicate.Range) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) UTC(org.joda.time.DateTimeZone.UTC) HostAndPort(com.google.common.net.HostAndPort) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) HivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege) SchemaTablePrefix(com.facebook.presto.spi.SchemaTablePrefix) ColumnHandle(com.facebook.presto.spi.ColumnHandle) SqlVarbinary(com.facebook.presto.spi.type.SqlVarbinary) ROLLBACK_RIGHT_AWAY(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) TableType(org.apache.hadoop.hive.metastore.TableType) HiveMetadata.convertToPredicate(com.facebook.presto.hive.HiveMetadata.convertToPredicate) TypeSignature.parseTypeSignature(com.facebook.presto.spi.type.TypeSignature.parseTypeSignature) ThriftHiveMetastore(com.facebook.presto.hive.metastore.ThriftHiveMetastore) ConnectorViewDefinition(com.facebook.presto.spi.ConnectorViewDefinition) HiveTestUtils.getDefaultHiveDataStreamFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveDataStreamFactories) ViewNotFoundException(com.facebook.presto.spi.ViewNotFoundException) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) HiveTestUtils.getDefaultHiveFileWriterFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) ROLLBACK_AFTER_FINISH_INSERT(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) HiveType.toHiveType(com.facebook.presto.hive.HiveType.toHiveType) ParquetHiveRecordCursor(com.facebook.presto.hive.parquet.ParquetHiveRecordCursor) Duration(io.airlift.units.Duration) MaterializedResult.materializeSourceDataStream(com.facebook.presto.testing.MaterializedResult.materializeSourceDataStream) SqlTimestamp(com.facebook.presto.spi.type.SqlTimestamp) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) HIVE_METASTORE_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_METASTORE_ERROR) Iterables.concat(com.google.common.collect.Iterables.concat) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) BUCKETED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) TypeSignatureParameter(com.facebook.presto.spi.type.TypeSignatureParameter) Path(org.apache.hadoop.fs.Path) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) NullableValue(com.facebook.presto.spi.predicate.NullableValue) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) String.format(java.lang.String.format) COMMIT(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.COMMIT) Preconditions.checkState(com.google.common.base.Preconditions.checkState) STORAGE_FORMAT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) HIVE_STRING(com.facebook.presto.hive.HiveType.HIVE_STRING) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER) Optional(java.util.Optional) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) NoHdfsAuthentication(com.facebook.presto.hive.authentication.NoHdfsAuthentication) JsonCodec(io.airlift.json.JsonCodec) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) Assert.assertNull(org.testng.Assert.assertNull) Logger(io.airlift.log.Logger) Column(com.facebook.presto.hive.metastore.Column) ArrayType(com.facebook.presto.type.ArrayType) RcFilePageSource(com.facebook.presto.hive.rcfile.RcFilePageSource) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) HadoopFileStatus(com.facebook.presto.hadoop.HadoopFileStatus) Assert.assertEquals(org.testng.Assert.assertEquals) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) Partition(com.facebook.presto.hive.metastore.Partition) MAP(com.facebook.presto.spi.type.StandardTypes.MAP) HashSet(java.util.HashSet) ROLLBACK_AFTER_APPEND_PAGE(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) HIVE_INT(com.facebook.presto.hive.HiveType.HIVE_INT) OrcPageSource(com.facebook.presto.hive.orc.OrcPageSource) ImmutableList(com.google.common.collect.ImmutableList) PARTITIONED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) ValueSet(com.facebook.presto.spi.predicate.ValueSet) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) NamedTypeSignature(com.facebook.presto.spi.type.NamedTypeSignature) Objects.requireNonNull(java.util.Objects.requireNonNull) Math.toIntExact(java.lang.Math.toIntExact) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) VARBINARY(com.facebook.presto.spi.type.VarbinaryType.VARBINARY) ExecutorService(java.util.concurrent.ExecutorService) ConnectorInsertTableHandle(com.facebook.presto.spi.ConnectorInsertTableHandle) CachingHiveMetastore(com.facebook.presto.hive.metastore.CachingHiveMetastore) UTF_8(java.nio.charset.StandardCharsets.UTF_8) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) Assert.fail(org.testng.Assert.fail) DateTime(org.joda.time.DateTime) SMALLINT(com.facebook.presto.spi.type.SmallintType.SMALLINT) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) MaterializedResult(com.facebook.presto.testing.MaterializedResult) Assertions.assertEqualsIgnoreOrder(io.airlift.testing.Assertions.assertEqualsIgnoreOrder) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) DATE(com.facebook.presto.spi.type.DateType.DATE) REAL(com.facebook.presto.spi.type.RealType.REAL) BUCKET_COUNT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) MaterializedRow(com.facebook.presto.testing.MaterializedRow) Assert.assertTrue(org.testng.Assert.assertTrue) ImmutableCollectors(com.facebook.presto.util.ImmutableCollectors) GroupByHashPageIndexerFactory(com.facebook.presto.GroupByHashPageIndexerFactory) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) JoinCompiler(com.facebook.presto.sql.gen.JoinCompiler) HiveTestUtils.getDefaultHiveRecordCursorProvider(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) NoHdfsAuthentication(com.facebook.presto.hive.authentication.NoHdfsAuthentication) TypeRegistry(com.facebook.presto.type.TypeRegistry) TypeManager(com.facebook.presto.spi.type.TypeManager) GroupByHashPageIndexerFactory(com.facebook.presto.GroupByHashPageIndexerFactory)

Example 12 with TypeManager

use of com.facebook.presto.spi.type.TypeManager in project presto by prestodb.

the class ParquetHiveRecordCursor method createParquetRecordReader.

private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
    ParquetDataSource dataSource = null;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
        dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
        ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(sessionUser, () -> ParquetFileReader.readFooter(configuration, path, NO_FILTER));
        List<BlockMetaData> blocks = parquetMetadata.getBlocks();
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileSchema);
        List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
        LongArrayList offsets = new LongArrayList(blocks.size());
        for (BlockMetaData block : blocks) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                if (predicatePushdownEnabled) {
                    ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
                    if (predicateMatches(parquetPredicate, block, dataSource, requestedSchema, effectivePredicate)) {
                        offsets.add(block.getStartingPos());
                    }
                } else {
                    offsets.add(block.getStartingPos());
                }
            }
        }
        ParquetInputSplit split = new ParquetInputSplit(path, start, start + length, length, null, offsets.toLongArray());
        TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID());
        return hdfsEnvironment.doAs(sessionUser, () -> {
            ParquetRecordReader<FakeParquetRecord> realReader = new PrestoParquetRecordReader(readSupport);
            realReader.initialize(split, taskContext);
            return realReader;
        });
    } catch (Exception e) {
        Throwables.propagateIfInstanceOf(e, PrestoException.class);
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
            throw Throwables.propagate(e);
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    } finally {
        if (dataSource != null) {
            try {
                dataSource.close();
            } catch (IOException ignored) {
            }
        }
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Arrays(java.util.Arrays) Block(com.facebook.presto.spi.block.Block) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CURSOR_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_CURSOR_ERROR) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Slices.wrappedBuffer(io.airlift.slice.Slices.wrappedBuffer) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DecimalType(com.facebook.presto.spi.type.DecimalType) DecimalMetadata(parquet.schema.DecimalMetadata) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BigInteger(java.math.BigInteger) PrimitiveType(parquet.schema.PrimitiveType) MAP_KEY_VALUE(parquet.schema.OriginalType.MAP_KEY_VALUE) Decimals(com.facebook.presto.spi.type.Decimals) ReadSupport(parquet.hadoop.api.ReadSupport) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) BlockBuilder(com.facebook.presto.spi.block.BlockBuilder) Math.min(java.lang.Math.min) Chars.trimSpacesAndTruncateToLength(com.facebook.presto.spi.type.Chars.trimSpacesAndTruncateToLength) Binary(parquet.io.api.Binary) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) ROW(com.facebook.presto.spi.type.StandardTypes.ROW) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) NO_FILTER(parquet.format.converter.ParquetMetadataConverter.NO_FILTER) Optional(java.util.Optional) Math.max(java.lang.Math.max) Varchars.truncateToLength(com.facebook.presto.spi.type.Varchars.truncateToLength) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) RecordMaterializer(parquet.io.api.RecordMaterializer) Converter(parquet.io.api.Converter) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) GroupConverter(parquet.io.api.GroupConverter) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) Slice(io.airlift.slice.Slice) ParquetFileReader(parquet.hadoop.ParquetFileReader) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ParquetRecordReader(parquet.hadoop.ParquetRecordReader) PrestoException(com.facebook.presto.spi.PrestoException) PrimitiveConverter(parquet.io.api.PrimitiveConverter) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) MAP(com.facebook.presto.spi.type.StandardTypes.MAP) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) DecimalUtils(com.facebook.presto.hive.util.DecimalUtils) ARRAY(com.facebook.presto.spi.type.StandardTypes.ARRAY) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) HiveUtil.closeWithSuppression(com.facebook.presto.hive.HiveUtil.closeWithSuppression) Type(com.facebook.presto.spi.type.Type) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) Objects.requireNonNull(java.util.Objects.requireNonNull) DECIMAL(parquet.schema.OriginalType.DECIMAL) BlockBuilderStatus(com.facebook.presto.spi.block.BlockBuilderStatus) Dictionary(parquet.column.Dictionary) TIMESTAMP(com.facebook.presto.spi.type.TimestampType.TIMESTAMP) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) MessageType(parquet.schema.MessageType) Properties(java.util.Properties) ParquetPredicateUtils.predicateMatches(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches) HiveUtil.getDecimalType(com.facebook.presto.hive.HiveUtil.getDecimalType) ContextUtil(parquet.hadoop.util.ContextUtil) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) FileMetaData(parquet.hadoop.metadata.FileMetaData) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) Collectors.toList(java.util.stream.Collectors.toList) GroupType(parquet.schema.GroupType) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) ParquetInputSplit(parquet.hadoop.ParquetInputSplit) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) PrestoException(com.facebook.presto.spi.PrestoException) FileSystem(org.apache.hadoop.fs.FileSystem) FileMetaData(parquet.hadoop.metadata.FileMetaData) MessageType(parquet.schema.MessageType) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) DecimalType(com.facebook.presto.spi.type.DecimalType) PrimitiveType(parquet.schema.PrimitiveType) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) Type(com.facebook.presto.spi.type.Type) MessageType(parquet.schema.MessageType) HiveUtil.getDecimalType(com.facebook.presto.hive.HiveUtil.getDecimalType) GroupType(parquet.schema.GroupType) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) ParquetInputSplit(parquet.hadoop.ParquetInputSplit)

Example 13 with TypeManager

use of com.facebook.presto.spi.type.TypeManager in project presto by prestodb.

the class ParquetPageSourceFactory method createParquetPageSource.

public static ParquetPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
    AggregatedMemoryContext systemMemoryContext = new AggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
        dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
        ParquetMetadata parquetMetadata = ParquetMetadataReader.readFooter(fileSystem, path);
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
        List<BlockMetaData> blocks = new ArrayList<>();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                blocks.add(block);
            }
        }
        if (predicatePushdownEnabled) {
            ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
            final ParquetDataSource finalDataSource = dataSource;
            blocks = blocks.stream().filter(block -> predicateMatches(parquetPredicate, block, finalDataSource, requestedSchema, effectivePredicate)).collect(toList());
        }
        ParquetReader parquetReader = new ParquetReader(fileSchema, requestedSchema, blocks, dataSource, typeManager, systemMemoryContext);
        return new ParquetPageSource(parquetReader, dataSource, fileSchema, requestedSchema, length, schema, columns, effectivePredicate, typeManager, useParquetColumnNames, systemMemoryContext);
    } catch (Exception e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HiveSessionProperties.isParquetOptimizedReaderEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedReaderEnabled) ArrayList(java.util.ArrayList) ParquetReader(com.facebook.presto.hive.parquet.reader.ParquetReader) Inject(javax.inject.Inject) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) Configuration(org.apache.hadoop.conf.Configuration) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) ParquetMetadataReader(com.facebook.presto.hive.parquet.reader.ParquetMetadataReader) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) HiveSessionProperties.isParquetPredicatePushdownEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetPredicatePushdownEnabled) ImmutableSet(com.google.common.collect.ImmutableSet) MessageType(parquet.schema.MessageType) Properties(java.util.Properties) ParquetPredicateUtils.predicateMatches(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches) Set(java.util.Set) IOException(java.io.IOException) AggregatedMemoryContext(com.facebook.presto.hive.parquet.memory.AggregatedMemoryContext) FileMetaData(parquet.hadoop.metadata.FileMetaData) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) HivePageSourceFactory(com.facebook.presto.hive.HivePageSourceFactory) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Objects(java.util.Objects) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HiveUtil.getDeserializerClassName(com.facebook.presto.hive.HiveUtil.getDeserializerClassName) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) ArrayList(java.util.ArrayList) ParquetReader(com.facebook.presto.hive.parquet.reader.ParquetReader) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) AggregatedMemoryContext(com.facebook.presto.hive.parquet.memory.AggregatedMemoryContext) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) MessageType(parquet.schema.MessageType) FileSystem(org.apache.hadoop.fs.FileSystem) FileMetaData(parquet.hadoop.metadata.FileMetaData) MessageType(parquet.schema.MessageType) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate)

Example 14 with TypeManager

use of com.facebook.presto.spi.type.TypeManager in project presto by prestodb.

the class TestAnalyzer method setup.

@BeforeMethod(alwaysRun = true)
public void setup() throws Exception {
    TypeManager typeManager = new TypeRegistry();
    CatalogManager catalogManager = new CatalogManager();
    transactionManager = createTestTransactionManager(catalogManager);
    accessControl = new AccessControlManager(transactionManager);
    metadata = new MetadataManager(new FeaturesConfig(), typeManager, new BlockEncodingManager(typeManager), new SessionPropertyManager(), new SchemaPropertyManager(), new TablePropertyManager(), transactionManager);
    metadata.getFunctionRegistry().addFunctions(ImmutableList.of(APPLY_FUNCTION));
    catalogManager.registerCatalog(createTestingCatalog(TPCH_CATALOG, TPCH_CONNECTOR_ID));
    catalogManager.registerCatalog(createTestingCatalog(SECOND_CATALOG, SECOND_CONNECTOR_ID));
    catalogManager.registerCatalog(createTestingCatalog(THIRD_CATALOG, THIRD_CONNECTOR_ID));
    SchemaTableName table1 = new SchemaTableName("s1", "t1");
    inSetupTransaction(session -> metadata.createTable(session, TPCH_CATALOG, new ConnectorTableMetadata(table1, ImmutableList.of(new ColumnMetadata("a", BIGINT), new ColumnMetadata("b", BIGINT), new ColumnMetadata("c", BIGINT), new ColumnMetadata("d", BIGINT)))));
    SchemaTableName table2 = new SchemaTableName("s1", "t2");
    inSetupTransaction(session -> metadata.createTable(session, TPCH_CATALOG, new ConnectorTableMetadata(table2, ImmutableList.of(new ColumnMetadata("a", BIGINT), new ColumnMetadata("b", BIGINT)))));
    SchemaTableName table3 = new SchemaTableName("s1", "t3");
    inSetupTransaction(session -> metadata.createTable(session, TPCH_CATALOG, new ConnectorTableMetadata(table3, ImmutableList.of(new ColumnMetadata("a", BIGINT), new ColumnMetadata("b", BIGINT), new ColumnMetadata("x", BIGINT, null, true)))));
    // table in different catalog
    SchemaTableName table4 = new SchemaTableName("s2", "t4");
    inSetupTransaction(session -> metadata.createTable(session, SECOND_CATALOG, new ConnectorTableMetadata(table4, ImmutableList.of(new ColumnMetadata("a", BIGINT)))));
    // table with a hidden column
    SchemaTableName table5 = new SchemaTableName("s1", "t5");
    inSetupTransaction(session -> metadata.createTable(session, TPCH_CATALOG, new ConnectorTableMetadata(table5, ImmutableList.of(new ColumnMetadata("a", BIGINT), new ColumnMetadata("b", BIGINT, null, true)))));
    // table with a varchar column
    SchemaTableName table6 = new SchemaTableName("s1", "t6");
    inSetupTransaction(session -> metadata.createTable(session, TPCH_CATALOG, new ConnectorTableMetadata(table6, ImmutableList.of(new ColumnMetadata("a", BIGINT), new ColumnMetadata("b", VARCHAR), new ColumnMetadata("c", BIGINT), new ColumnMetadata("d", BIGINT)))));
    // table with bigint, double, array of bigints and array of doubles column
    SchemaTableName table7 = new SchemaTableName("s1", "t7");
    inSetupTransaction(session -> metadata.createTable(session, TPCH_CATALOG, new ConnectorTableMetadata(table7, ImmutableList.of(new ColumnMetadata("a", BIGINT), new ColumnMetadata("b", DOUBLE), new ColumnMetadata("c", new ArrayType(BIGINT)), new ColumnMetadata("d", new ArrayType(DOUBLE))))));
    // valid view referencing table in same schema
    String viewData1 = JsonCodec.jsonCodec(ViewDefinition.class).toJson(new ViewDefinition("select a from t1", Optional.of(TPCH_CATALOG), Optional.of("s1"), ImmutableList.of(new ViewColumn("a", BIGINT)), Optional.of("user")));
    inSetupTransaction(session -> metadata.createView(session, new QualifiedObjectName(TPCH_CATALOG, "s1", "v1"), viewData1, false));
    // stale view (different column type)
    String viewData2 = JsonCodec.jsonCodec(ViewDefinition.class).toJson(new ViewDefinition("select a from t1", Optional.of(TPCH_CATALOG), Optional.of("s1"), ImmutableList.of(new ViewColumn("a", VARCHAR)), Optional.of("user")));
    inSetupTransaction(session -> metadata.createView(session, new QualifiedObjectName(TPCH_CATALOG, "s1", "v2"), viewData2, false));
    // view referencing table in different schema from itself and session
    String viewData3 = JsonCodec.jsonCodec(ViewDefinition.class).toJson(new ViewDefinition("select a from t4", Optional.of(SECOND_CATALOG), Optional.of("s2"), ImmutableList.of(new ViewColumn("a", BIGINT)), Optional.of("owner")));
    inSetupTransaction(session -> metadata.createView(session, new QualifiedObjectName(THIRD_CATALOG, "s3", "v3"), viewData3, false));
    // valid view with uppercase column name
    String viewData4 = JsonCodec.jsonCodec(ViewDefinition.class).toJson(new ViewDefinition("select A from t1", Optional.of("tpch"), Optional.of("s1"), ImmutableList.of(new ViewColumn("a", BIGINT)), Optional.of("user")));
    inSetupTransaction(session -> metadata.createView(session, new QualifiedObjectName("tpch", "s1", "v4"), viewData4, false));
    // recursive view referencing to itself
    String viewData5 = JsonCodec.jsonCodec(ViewDefinition.class).toJson(new ViewDefinition("select * from v5", Optional.of(TPCH_CATALOG), Optional.of("s1"), ImmutableList.of(new ViewColumn("a", BIGINT)), Optional.of("user")));
    inSetupTransaction(session -> metadata.createView(session, new QualifiedObjectName(TPCH_CATALOG, "s1", "v5"), viewData5, false));
    this.metadata = metadata;
}
Also used : AccessControlManager(com.facebook.presto.security.AccessControlManager) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) ViewColumn(com.facebook.presto.metadata.ViewDefinition.ViewColumn) ViewDefinition(com.facebook.presto.metadata.ViewDefinition) TypeRegistry(com.facebook.presto.type.TypeRegistry) SchemaTableName(com.facebook.presto.spi.SchemaTableName) CatalogManager(com.facebook.presto.metadata.CatalogManager) QualifiedObjectName(com.facebook.presto.metadata.QualifiedObjectName) ArrayType(com.facebook.presto.type.ArrayType) MetadataManager(com.facebook.presto.metadata.MetadataManager) BlockEncodingManager(com.facebook.presto.block.BlockEncodingManager) SessionPropertyManager(com.facebook.presto.metadata.SessionPropertyManager) TypeManager(com.facebook.presto.spi.type.TypeManager) TablePropertyManager(com.facebook.presto.metadata.TablePropertyManager) SchemaPropertyManager(com.facebook.presto.metadata.SchemaPropertyManager) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) BeforeMethod(org.testng.annotations.BeforeMethod)

Example 15 with TypeManager

use of com.facebook.presto.spi.type.TypeManager in project presto by prestodb.

the class AccumuloModule method configure.

@Override
public void configure(Binder binder) {
    // Add appender to Log4J root logger
    //create appender
    JulAppender appender = new JulAppender();
    appender.setLayout(new PatternLayout("%d %-5p %c - %m%n"));
    appender.setThreshold(Level.INFO);
    appender.activateOptions();
    org.apache.log4j.Logger.getRootLogger().addAppender(appender);
    binder.bind(TypeManager.class).toInstance(typeManager);
    binder.bind(AccumuloConnector.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloConnectorId.class).toInstance(new AccumuloConnectorId(connectorId));
    binder.bind(AccumuloMetadata.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloMetadataFactory.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloClient.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloSplitManager.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloRecordSetProvider.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloPageSinkProvider.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloHandleResolver.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloSessionProperties.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloTableProperties.class).in(Scopes.SINGLETON);
    binder.bind(ZooKeeperMetadataManager.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloTableManager.class).in(Scopes.SINGLETON);
    binder.bind(Connector.class).toProvider(ConnectorProvider.class);
    configBinder(binder).bindConfig(AccumuloConfig.class);
    jsonBinder(binder).addDeserializerBinding(Type.class).to(TypeDeserializer.class);
    jsonCodecBinder(binder).bindMapJsonCodec(String.class, JsonCodec.listJsonCodec(AccumuloTable.class));
}
Also used : Connector(org.apache.accumulo.core.client.Connector) PatternLayout(org.apache.log4j.PatternLayout) AccumuloTableProperties(com.facebook.presto.accumulo.conf.AccumuloTableProperties) AccumuloTable(com.facebook.presto.accumulo.metadata.AccumuloTable) AccumuloPageSinkProvider(com.facebook.presto.accumulo.io.AccumuloPageSinkProvider) Type(com.facebook.presto.spi.type.Type) AccumuloSessionProperties(com.facebook.presto.accumulo.conf.AccumuloSessionProperties) ZooKeeperMetadataManager(com.facebook.presto.accumulo.metadata.ZooKeeperMetadataManager) TypeManager(com.facebook.presto.spi.type.TypeManager) JulAppender(org.apache.log4j.JulAppender) AccumuloRecordSetProvider(com.facebook.presto.accumulo.io.AccumuloRecordSetProvider)

Aggregations

TypeManager (com.facebook.presto.spi.type.TypeManager)15 Type (com.facebook.presto.spi.type.Type)10 List (java.util.List)10 ImmutableList (com.google.common.collect.ImmutableList)8 Objects.requireNonNull (java.util.Objects.requireNonNull)8 Optional (java.util.Optional)7 Collectors.toList (java.util.stream.Collectors.toList)7 Path (org.apache.hadoop.fs.Path)7 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)6 String.format (java.lang.String.format)6 REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)5 ConnectorSession (com.facebook.presto.spi.ConnectorSession)5 PrestoException (com.facebook.presto.spi.PrestoException)5 TupleDomain (com.facebook.presto.spi.predicate.TupleDomain)5 ImmutableSet (com.google.common.collect.ImmutableSet)5 IOException (java.io.IOException)5 Properties (java.util.Properties)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 DateTimeZone (org.joda.time.DateTimeZone)5 TypeSignature.parseTypeSignature (com.facebook.presto.spi.type.TypeSignature.parseTypeSignature)4