Search in sources :

Example 16 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class OrcFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
    if (!HiveSessionProperties.isOrcOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    OrcEncoding orcEncoding;
    if (OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        orcEncoding = ORC;
    } else if (com.facebook.hive.orc.OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        orcEncoding = DWRF;
    } else {
        return Optional.empty();
    }
    CompressionKind compression = getCompression(schema, configuration, orcEncoding);
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        DataSink dataSink = createDataSink(session, fileSystem, path);
        Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSystem.getFileStatus(path).getLen(), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, fileSystem.open(path), readStats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        Optional<DwrfWriterEncryption> dwrfWriterEncryption = createDwrfEncryption(encryptionInformation, fileColumnNames, fileColumnTypes);
        return Optional.of(new OrcFileWriter(dataSink, rollbackAction, orcEncoding, fileColumnNames, fileColumnTypes, compression, orcFileWriterConfig.toOrcWriterOptionsBuilder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(getOrcOptimizedWriterMaxStripeRows(session)).build()).withDictionaryMaxMemory(getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)).withIgnoreDictionaryRowGroupSizes(isExecutionBasedMemoryAccountingEnabled(session)).withDwrfStripeCacheEnabled(isDwrfWriterStripeCacheEnabled(session)).withDwrfStripeCacheMaxSize(getDwrfWriterStripeCacheeMaxSize(session)).build(), fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(MetastoreUtil.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), hiveStorageTimeZone, validationInputFactory, getOrcOptimizedWriterValidateMode(session), stats, dwrfEncryptionProvider, dwrfWriterEncryption));
    } catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating " + orcEncoding + " file. " + e.getMessage(), e);
    }
}
Also used : HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSessionProperties.getDwrfWriterStripeCacheeMaxSize(com.facebook.presto.hive.HiveSessionProperties.getDwrfWriterStripeCacheeMaxSize) HIVE_WRITE_VALIDATION_FAILED(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED) DataSink(com.facebook.presto.common.io.DataSink) HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory) OrcConf(org.apache.orc.OrcConf) HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterMinStripeSize) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getOrcOptimizedWriterValidateMode(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterValidateMode) OrcDataSource(com.facebook.presto.orc.OrcDataSource) Splitter(com.google.common.base.Splitter) ENGLISH(java.util.Locale.ENGLISH) WriterEncryptionGroup(com.facebook.presto.orc.WriterEncryptionGroup) CRYPTO_SERVICE(com.facebook.presto.orc.metadata.KeyProvider.CRYPTO_SERVICE) HIVE_UNSUPPORTED_FORMAT(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) KeyProvider(com.facebook.presto.orc.metadata.KeyProvider) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveSessionProperties.isDwrfWriterStripeCacheEnabled(com.facebook.presto.hive.HiveSessionProperties.isDwrfWriterStripeCacheEnabled) HiveSessionProperties.isExecutionBasedMemoryAccountingEnabled(com.facebook.presto.hive.HiveSessionProperties.isExecutionBasedMemoryAccountingEnabled) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) DWRF(com.facebook.presto.orc.OrcEncoding.DWRF) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) ImmutableListMultimap(com.google.common.collect.ImmutableListMultimap) Optional(java.util.Optional) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) IntStream(java.util.stream.IntStream) HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize) Slice(io.airlift.slice.Slice) HiveSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxMergeDistance) Flatten(org.weakref.jmx.Flatten) Callable(java.util.concurrent.Callable) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) DwrfWriterEncryption(com.facebook.presto.orc.DwrfWriterEncryption) DataSinkFactory(com.facebook.presto.hive.datasink.DataSinkFactory) PrestoException(com.facebook.presto.spi.PrestoException) Supplier(java.util.function.Supplier) UNKNOWN(com.facebook.presto.orc.metadata.KeyProvider.UNKNOWN) Inject(javax.inject.Inject) MetastoreUtil(com.facebook.presto.hive.metastore.MetastoreUtil) Managed(org.weakref.jmx.Managed) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getOrcMaxBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxBufferSize) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcWriterStats(com.facebook.presto.orc.OrcWriterStats) Type(com.facebook.presto.common.type.Type) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) OrcEncoding(com.facebook.presto.orc.OrcEncoding) Properties(java.util.Properties) DefaultOrcWriterFlushPolicy(com.facebook.presto.orc.DefaultOrcWriterFlushPolicy) HiveSessionProperties.getOrcStreamBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcStreamBufferSize) IOException(java.io.IOException) HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) HiveSessionProperties.getOrcStringStatisticsLimit(com.facebook.presto.hive.HiveSessionProperties.getOrcStringStatisticsLimit) HIVE_WRITER_OPEN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) DataSink(com.facebook.presto.common.io.DataSink) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) PrestoException(com.facebook.presto.spi.PrestoException) OrcEncoding(com.facebook.presto.orc.OrcEncoding) IOException(java.io.IOException) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) FileSystem(org.apache.hadoop.fs.FileSystem) DwrfWriterEncryption(com.facebook.presto.orc.DwrfWriterEncryption) Supplier(java.util.function.Supplier)

Example 17 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class AbstractTestHiveClient method doTestMismatchSchemaTable.

protected void doTestMismatchSchemaTable(SchemaTableName schemaTableName, HiveStorageFormat storageFormat, List<ColumnMetadata> tableBefore, MaterializedResult dataBefore, List<ColumnMetadata> tableAfter, MaterializedResult dataAfter, List<RowExpression> afterFilters, List<Predicate<MaterializedRow>> afterResultPredicates) throws Exception {
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    doCreateEmptyTable(schemaTableName, storageFormat, tableBefore);
    // insert the data
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
        sink.appendPage(dataBefore.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // load the table and verify the data
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
        List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), dataBefore.getMaterializedRows());
        transaction.commit();
    }
    // alter the table schema
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
        PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(session);
        Table oldTable = transaction.getMetastore().getTable(metastoreContext, schemaName, tableName).get();
        HiveTypeTranslator hiveTypeTranslator = new HiveTypeTranslator();
        List<Column> dataColumns = tableAfter.stream().filter(columnMetadata -> !columnMetadata.getName().equals("ds")).map(columnMetadata -> new Column(columnMetadata.getName(), toHiveType(hiveTypeTranslator, columnMetadata.getType()), Optional.empty(), Optional.empty())).collect(toList());
        Table.Builder newTable = Table.builder(oldTable).setDataColumns(dataColumns);
        transaction.getMetastore().replaceView(metastoreContext, schemaName, tableName, newTable.build(), principalPrivileges);
        transaction.commit();
    }
    // load the altered table and verify the data
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
        List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), dataAfter.getMaterializedRows());
        int filterCount = afterFilters.size();
        for (int i = 0; i < filterCount; i++) {
            RowExpression predicate = afterFilters.get(i);
            ConnectorTableLayoutHandle layoutHandle = pushdownFilter(session, metadata, transaction.getMetastore(), ROW_EXPRESSION_SERVICE, FUNCTION_RESOLUTION, hivePartitionManager, METADATA.getFunctionAndTypeManager(), tableHandle, predicate, Optional.empty()).getLayout().getHandle();
            // Read all columns with a filter
            MaterializedResult filteredResult = readTable(transaction, tableHandle, layoutHandle, columnHandles, session, OptionalInt.empty(), Optional.empty());
            Predicate<MaterializedRow> rowPredicate = afterResultPredicates.get(i);
            List<MaterializedRow> expectedRows = dataAfter.getMaterializedRows().stream().filter(rowPredicate::apply).collect(toList());
            assertEqualsIgnoreOrder(filteredResult.getMaterializedRows(), expectedRows);
            // Read all columns except the ones used in the filter
            Set<String> filterColumnNames = extractUnique(predicate).stream().map(VariableReferenceExpression::getName).collect(toImmutableSet());
            List<ColumnHandle> nonFilterColumns = columnHandles.stream().filter(column -> !filterColumnNames.contains(((HiveColumnHandle) column).getName())).collect(toList());
            int resultCount = readTable(transaction, tableHandle, layoutHandle, nonFilterColumns, session, OptionalInt.empty(), Optional.empty()).getRowCount();
            assertEquals(resultCount, expectedRows.size());
        }
        transaction.commit();
    }
    // insertions to the partitions with type mismatches should fail
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
        sink.appendPage(dataAfter.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        transaction.commit();
        fail("expected exception");
    } catch (PrestoException e) {
        // expected
        assertEquals(e.getErrorCode(), HIVE_PARTITION_SCHEMA_MISMATCH.toErrorCode());
    }
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) SkipException(org.testng.SkipException) WarningCollector(com.facebook.presto.spi.WarningCollector) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) DateTimeZone(org.joda.time.DateTimeZone) SORTED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.SORTED_BY_PROPERTY) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) FileSystem(org.apache.hadoop.fs.FileSystem) PrestoPrincipal(com.facebook.presto.spi.security.PrestoPrincipal) CounterStat(com.facebook.airlift.stats.CounterStat) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) FileStatus(org.apache.hadoop.fs.FileStatus) MoreFutures.getFutureValue(com.facebook.airlift.concurrent.MoreFutures.getFutureValue) MAX_PARTITION_KEY_COLUMN_INDEX(com.facebook.presto.hive.HiveColumnHandle.MAX_PARTITION_KEY_COLUMN_INDEX) NOT_PARTITIONED(com.facebook.presto.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Slices(io.airlift.slice.Slices) Map(java.util.Map) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) NullableValue(com.facebook.presto.common.predicate.NullableValue) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) SqlFunctionProperties(com.facebook.presto.common.function.SqlFunctionProperties) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) TABLE(com.facebook.presto.hive.CacheQuotaScope.TABLE) ZoneId(java.time.ZoneId) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) Predicate(com.google.common.base.Predicate) HiveColumnStatistics.createBinaryColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) HivePrivilegeInfo(com.facebook.presto.hive.metastore.HivePrivilegeInfo) StandardTypes(com.facebook.presto.common.type.StandardTypes) Table(com.facebook.presto.hive.metastore.Table) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) BUCKET_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) HiveTestUtils.arrayType(com.facebook.presto.hive.HiveTestUtils.arrayType) HiveUtil.columnExtraInfo(com.facebook.presto.hive.HiveUtil.columnExtraInfo) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) REAL(com.facebook.presto.common.type.RealType.REAL) PAGEFILE(com.facebook.presto.hive.HiveStorageFormat.PAGEFILE) OptionalLong(java.util.OptionalLong) ROLLBACK_AFTER_SINK_FINISH(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) GIGABYTE(io.airlift.units.DataSize.Unit.GIGABYTE) HIVE_BYTE(com.facebook.presto.hive.HiveType.HIVE_BYTE) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) Assertions.assertInstanceOf(com.facebook.airlift.testing.Assertions.assertInstanceOf) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) MetastoreCacheScope(com.facebook.presto.hive.metastore.CachingHiveMetastore.MetastoreCacheScope) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) ThriftHiveMetastore(com.facebook.presto.hive.metastore.thrift.ThriftHiveMetastore) AfterClass(org.testng.annotations.AfterClass) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) HostAndPort(com.google.common.net.HostAndPort) Domain(com.facebook.presto.common.predicate.Domain) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) SqlFunctionId(com.facebook.presto.spi.function.SqlFunctionId) TypeSignature.parseTypeSignature(com.facebook.presto.common.type.TypeSignature.parseTypeSignature) SchemaTablePrefix(com.facebook.presto.spi.SchemaTablePrefix) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) SplitSchedulingContext(com.facebook.presto.spi.connector.ConnectorSplitManager.SplitSchedulingContext) PRESTO_QUERY_ID_NAME(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_QUERY_ID_NAME) HiveColumnStatistics.createDecimalColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics) ConnectorViewDefinition(com.facebook.presto.spi.ConnectorViewDefinition) RowType(com.facebook.presto.common.type.RowType) ViewNotFoundException(com.facebook.presto.spi.ViewNotFoundException) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) ROLLBACK_AFTER_FINISH_INSERT(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) HiveFilterPushdown.pushdownFilter(com.facebook.presto.hive.rule.HiveFilterPushdown.pushdownFilter) Duration(io.airlift.units.Duration) MaterializedResult.materializeSourceDataStream(com.facebook.presto.testing.MaterializedResult.materializeSourceDataStream) HIVE_FLOAT(com.facebook.presto.hive.HiveType.HIVE_FLOAT) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) TypeProvider(com.facebook.presto.sql.planner.TypeProvider) Locale(java.util.Locale) UNGROUPED_SCHEDULING(com.facebook.presto.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) BUCKETED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) Thread.sleep(java.lang.Thread.sleep) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) PageFilePageSource(com.facebook.presto.hive.pagefile.PageFilePageSource) ImmutableSet(com.google.common.collect.ImmutableSet) OFFLINE_DATA_DEBUG_MODE_ENABLED(com.facebook.presto.hive.HiveSessionProperties.OFFLINE_DATA_DEBUG_MODE_ENABLED) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) TestingNodeManager(com.facebook.presto.testing.TestingNodeManager) Range(com.facebook.presto.common.predicate.Range) Objects(java.util.Objects) HIVE_STRING(com.facebook.presto.hive.HiveType.HIVE_STRING) MetastoreUtil.toPartitionValues(com.facebook.presto.hive.metastore.MetastoreUtil.toPartitionValues) HiveColumnStatistics.createBooleanColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) METADATA(com.facebook.presto.hive.HiveTestUtils.METADATA) HiveTestUtils.getDefaultHiveBatchPageSourceFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveBatchPageSourceFactories) IntStream(java.util.stream.IntStream) MoreExecutors.listeningDecorator(com.google.common.util.concurrent.MoreExecutors.listeningDecorator) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) MapType(com.facebook.presto.common.type.MapType) FILTER_STATS_CALCULATOR_SERVICE(com.facebook.presto.hive.HiveTestUtils.FILTER_STATS_CALCULATOR_SERVICE) Assert.assertNull(org.testng.Assert.assertNull) HiveColumnStatistics.createIntegerColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) RcFilePageSource(com.facebook.presto.hive.rcfile.RcFilePageSource) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) OptionalDouble(java.util.OptionalDouble) Assert.assertEquals(org.testng.Assert.assertEquals) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) OptionalInt(java.util.OptionalInt) HashSet(java.util.HashSet) ROLLBACK_AFTER_APPEND_PAGE(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) HIVE_DOUBLE(com.facebook.presto.hive.HiveType.HIVE_DOUBLE) HiveCluster(com.facebook.presto.hive.metastore.thrift.HiveCluster) HiveTestUtils.mapType(com.facebook.presto.hive.HiveTestUtils.mapType) Math.toIntExact(java.lang.Math.toIntExact) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) Type(com.facebook.presto.common.type.Type) ExecutorService(java.util.concurrent.ExecutorService) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) ConnectorInsertTableHandle(com.facebook.presto.spi.ConnectorInsertTableHandle) DEFAULT_COLUMN_CONVERTER_PROVIDER(com.facebook.presto.hive.HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER) USER(com.facebook.presto.spi.security.PrincipalType.USER) HiveTestUtils.getDefaultHiveSelectivePageSourceFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveSelectivePageSourceFactories) HiveColumnStatistics.createStringColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createStringColumnStatistics) CachingHiveMetastore(com.facebook.presto.hive.metastore.CachingHiveMetastore) UTF_8(java.nio.charset.StandardCharsets.UTF_8) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) Assert.fail(org.testng.Assert.fail) DateTime(org.joda.time.DateTime) PageSinkContext(com.facebook.presto.spi.PageSinkContext) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) MetastoreUtil.createDirectory(com.facebook.presto.hive.metastore.MetastoreUtil.createDirectory) TRANSACTION_CONFLICT(com.facebook.presto.spi.StandardErrorCode.TRANSACTION_CONFLICT) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) Hashing.sha256(com.google.common.hash.Hashing.sha256) MaterializedResult(com.facebook.presto.testing.MaterializedResult) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPartitioningMetadata(com.facebook.presto.spi.connector.ConnectorPartitioningMetadata) BUCKET_COUNT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) Assert.assertTrue(org.testng.Assert.assertTrue) PlanBuilder.expression(com.facebook.presto.sql.planner.iterative.rule.test.PlanBuilder.expression) HiveTestUtils.getDefaultHiveRecordCursorProvider(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) HIVE_COMPATIBLE(com.facebook.presto.hive.BucketFunctionType.HIVE_COMPATIBLE) Page(com.facebook.presto.common.Page) Arrays(java.util.Arrays) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) HIVE_SHORT(com.facebook.presto.hive.HiveType.HIVE_SHORT) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) SqlInvokedFunction(com.facebook.presto.spi.function.SqlInvokedFunction) ROLLBACK_AFTER_BEGIN_INSERT(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) HiveColumnStatistics.createDoubleColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics) BigDecimal(java.math.BigDecimal) TupleDomain.withColumnDomains(com.facebook.presto.common.predicate.TupleDomain.withColumnDomains) Sets.difference(com.google.common.collect.Sets.difference) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) ROLLBACK_AFTER_DELETE(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) PRESTO_VERSION_NAME(com.facebook.presto.hive.HiveMetadata.PRESTO_VERSION_NAME) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) HIVE_LONG(com.facebook.presto.hive.HiveType.HIVE_LONG) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) Assert.assertNotEquals(org.testng.Assert.assertNotEquals) PrincipalPrivileges(com.facebook.presto.hive.metastore.PrincipalPrivileges) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TestingRowExpressionTranslator(com.facebook.presto.sql.TestingRowExpressionTranslator) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Executors(java.util.concurrent.Executors) ConnectorSession(com.facebook.presto.spi.ConnectorSession) FeaturesConfig(com.facebook.presto.sql.analyzer.FeaturesConfig) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) Assertions.assertGreaterThan(com.facebook.airlift.testing.Assertions.assertGreaterThan) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) OrcSelectivePageSource(com.facebook.presto.hive.orc.OrcSelectivePageSource) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(com.facebook.presto.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) VariablesExtractor.extractUnique(com.facebook.presto.sql.planner.VariablesExtractor.extractUnique) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) Assertions.assertGreaterThanOrEqual(com.facebook.airlift.testing.Assertions.assertGreaterThanOrEqual) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) WriteInfo(com.facebook.presto.hive.LocationService.WriteInfo) HYPER_LOG_LOG(com.facebook.presto.common.type.HyperLogLogType.HYPER_LOG_LOG) HiveBasicStatistics.createEmptyStatistics(com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics) DATE(com.facebook.presto.common.type.DateType.DATE) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ArrayList(java.util.ArrayList) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) SqlDate(com.facebook.presto.common.type.SqlDate) NON_CACHEABLE(com.facebook.presto.spi.SplitContext.NON_CACHEABLE) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) TableHandle(com.facebook.presto.spi.TableHandle) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) HiveTestUtils.rowType(com.facebook.presto.hive.HiveTestUtils.rowType) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) LongStream(java.util.stream.LongStream) Executor(java.util.concurrent.Executor) Constraint(com.facebook.presto.spi.Constraint) UTC_KEY(com.facebook.presto.common.type.TimeZoneKey.UTC_KEY) UTC(org.joda.time.DateTimeZone.UTC) TestingHiveCluster(com.facebook.presto.hive.metastore.thrift.TestingHiveCluster) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Assertions.assertLessThanOrEqual(com.facebook.airlift.testing.Assertions.assertLessThanOrEqual) ROLLBACK_RIGHT_AWAY(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) HiveMetadata.convertToPredicate(com.facebook.presto.hive.HiveMetadata.convertToPredicate) HIVE_BOOLEAN(com.facebook.presto.hive.HiveType.HIVE_BOOLEAN) BridgingHiveMetastore(com.facebook.presto.hive.metastore.thrift.BridgingHiveMetastore) HiveColumnStatistics.createDateColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createDateColumnStatistics) ValueSet(com.facebook.presto.common.predicate.ValueSet) SORTED_WRITE_TO_TEMP_PATH_ENABLED(com.facebook.presto.hive.HiveSessionProperties.SORTED_WRITE_TO_TEMP_PATH_ENABLED) MetadataManager(com.facebook.presto.metadata.MetadataManager) HiveTestUtils.getDefaultHiveFileWriterFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveType.toHiveType(com.facebook.presto.hive.HiveType.toHiveType) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) CacheConfig(com.facebook.presto.cache.CacheConfig) HiveTestUtils.getDefaultOrcFileWriterFactory(com.facebook.presto.hive.HiveTestUtils.getDefaultOrcFileWriterFactory) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) Iterables.concat(com.google.common.collect.Iterables.concat) MANAGED_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.MANAGED_TABLE) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) METASTORE_CONTEXT(com.facebook.presto.hive.HiveQueryRunner.METASTORE_CONTEXT) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) String.format(java.lang.String.format) COMMIT(com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.COMMIT) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) STORAGE_FORMAT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) PAGE_SORTER(com.facebook.presto.hive.HiveTestUtils.PAGE_SORTER) RecordCursor(com.facebook.presto.spi.RecordCursor) DataSize(io.airlift.units.DataSize) List(java.util.List) PrestoTableType(com.facebook.presto.hive.metastore.PrestoTableType) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Optional(java.util.Optional) ConnectorId(com.facebook.presto.spi.ConnectorId) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) NoHdfsAuthentication(com.facebook.presto.hive.authentication.NoHdfsAuthentication) Logger(com.facebook.airlift.log.Logger) Column(com.facebook.presto.hive.metastore.Column) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) HashMap(java.util.HashMap) PrestoException(com.facebook.presto.spi.PrestoException) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) Partition(com.facebook.presto.hive.metastore.Partition) HIVE_INT(com.facebook.presto.hive.HiveType.HIVE_INT) PARTITIONED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Verify.verify(com.google.common.base.Verify.verify) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) SortingColumn(com.facebook.presto.hive.metastore.SortingColumn) RowExpression(com.facebook.presto.spi.relation.RowExpression) Assertions.assertEqualsIgnoreOrder(com.facebook.airlift.testing.Assertions.assertEqualsIgnoreOrder) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ConnectorIdentity(com.facebook.presto.spi.security.ConnectorIdentity) HiveBasicStatistics.createZeroStatistics(com.facebook.presto.hive.HiveBasicStatistics.createZeroStatistics) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) MaterializedRow(com.facebook.presto.testing.MaterializedRow) StorageFormat.fromHiveStorageFormat(com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat) RowFieldName(com.facebook.presto.common.type.RowFieldName) GroupByHashPageIndexerFactory(com.facebook.presto.GroupByHashPageIndexerFactory) Block(com.facebook.presto.common.block.Block) JoinCompiler(com.facebook.presto.sql.gen.JoinCompiler) SECONDS(java.util.concurrent.TimeUnit.SECONDS) ConnectorInsertTableHandle(com.facebook.presto.spi.ConnectorInsertTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) Column(com.facebook.presto.hive.metastore.Column) SortingColumn(com.facebook.presto.hive.metastore.SortingColumn) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) Table(com.facebook.presto.hive.metastore.Table) PrincipalPrivileges(com.facebook.presto.hive.metastore.PrincipalPrivileges) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) RowExpression(com.facebook.presto.spi.relation.RowExpression) Constraint(com.facebook.presto.spi.Constraint) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) MaterializedResult(com.facebook.presto.testing.MaterializedResult) MaterializedRow(com.facebook.presto.testing.MaterializedRow)

Example 18 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class AbstractTestHiveFileFormats method createTestFile.

public static FileSplit createTestFile(String filePath, HiveStorageFormat storageFormat, HiveCompressionCodec compressionCodec, List<TestColumn> testColumns, ConnectorSession session, int numRows, HiveFileWriterFactory fileWriterFactory) {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
    List<Type> types = testColumns.stream().map(TestColumn::getType).map(HiveType::valueOf).map(type -> type.getType(FUNCTION_AND_TYPE_MANAGER)).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(types);
    for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
        pageBuilder.declarePosition();
        for (int columnNumber = 0; columnNumber < testColumns.size(); columnNumber++) {
            serializeObject(types.get(columnNumber), pageBuilder.getBlockBuilder(columnNumber), testColumns.get(columnNumber).getWriteValue(), testColumns.get(columnNumber).getObjectInspector(), false);
        }
    }
    Page page = pageBuilder.build();
    JobConf jobConf = configureCompression(new JobConf(), compressionCodec);
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    Optional<HiveFileWriter> fileWriter = fileWriterFactory.createFileWriter(new Path(filePath), testColumns.stream().map(TestColumn::getName).collect(toList()), StorageFormat.fromHiveStorageFormat(storageFormat), tableProperties, jobConf, session, Optional.empty());
    HiveFileWriter hiveFileWriter = fileWriter.orElseThrow(() -> new IllegalArgumentException("fileWriterFactory"));
    hiveFileWriter.appendRows(page);
    Optional<Page> fileStatistics = hiveFileWriter.commit();
    assertFileStatistics(fileStatistics, hiveFileWriter.getFileSizeInBytes(), storageFormat);
    return new FileSplit(new Path(filePath), 0, new File(filePath).length(), new String[0]);
}
Also used : CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) JavaHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) HiveManifestUtils.getFileSize(com.facebook.presto.hive.HiveManifestUtils.getFileSize) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) PageBuilder(com.facebook.presto.common.PageBuilder) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) BigDecimal(java.math.BigDecimal) FileSplit(org.apache.hadoop.mapred.FileSplit) Predicates.not(com.google.common.base.Predicates.not) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) StructuralTestUtil.decimalArrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.decimalArrayBlockOf) PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector) ConnectorSession(com.facebook.presto.spi.ConnectorSession) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) SliceOutput(io.airlift.slice.SliceOutput) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) HiveUtil.isStructuralType(com.facebook.presto.hive.HiveUtil.isStructuralType) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) SqlDate(com.facebook.presto.common.type.SqlDate) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) BlockEncodingManager(com.facebook.presto.common.block.BlockEncodingManager) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Arrays.fill(java.util.Arrays.fill) Properties(java.util.Properties) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) UTC(org.joda.time.DateTimeZone.UTC) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ConfigurationUtils.configureCompression(com.facebook.presto.hive.util.ConfigurationUtils.configureCompression) SerDeUtils.serializeObject(com.facebook.presto.hive.util.SerDeUtils.serializeObject) RowType(com.facebook.presto.common.type.RowType) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) Iterables.transform(com.google.common.collect.Iterables.transform) MaterializedResult.materializeSourceDataStream(com.facebook.presto.testing.MaterializedResult.materializeSourceDataStream) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Path(org.apache.hadoop.fs.Path) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) DateTimeFormat(org.joda.time.format.DateTimeFormat) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Timestamp(java.sql.Timestamp) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) HiveOutputFormat(org.apache.hadoop.hive.ql.io.HiveOutputFormat) Preconditions.checkState(com.google.common.base.Preconditions.checkState) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) Optional(java.util.Optional) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) StructuralTestUtil.decimalMapBlockOf(com.facebook.presto.tests.StructuralTestUtil.decimalMapBlockOf) TimestampType(com.facebook.presto.common.type.TimestampType) TypeInfoFactory.getCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo) Strings.padEnd(com.google.common.base.Strings.padEnd) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) BlockSerdeUtil(com.facebook.presto.common.block.BlockSerdeUtil) BlockEncodingSerde(com.facebook.presto.common.block.BlockEncodingSerde) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) StructuralTestUtil(com.facebook.presto.tests.StructuralTestUtil) Objects.requireNonNull(java.util.Objects.requireNonNull) HiveTestUtils.mapType(com.facebook.presto.hive.HiveTestUtils.mapType) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Type(com.facebook.presto.common.type.Type) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DateTime(org.joda.time.DateTime) HIVE_DEFAULT_DYNAMIC_PARTITION(com.facebook.presto.hive.metastore.MetastoreUtil.HIVE_DEFAULT_DYNAMIC_PARTITION) Decimals(com.facebook.presto.common.type.Decimals) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) Date(java.sql.Date) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) MaterializedResult(com.facebook.presto.testing.MaterializedResult) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) MaterializedRow(com.facebook.presto.testing.MaterializedRow) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Block(com.facebook.presto.common.block.Block) DateType(com.facebook.presto.common.type.DateType) Path(org.apache.hadoop.fs.Path) Page(com.facebook.presto.common.Page) PageBuilder(com.facebook.presto.common.PageBuilder) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) DecimalType(com.facebook.presto.common.type.DecimalType) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) HiveUtil.isStructuralType(com.facebook.presto.hive.HiveUtil.isStructuralType) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) RowType(com.facebook.presto.common.type.RowType) TimestampType(com.facebook.presto.common.type.TimestampType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) HiveTestUtils.mapType(com.facebook.presto.hive.HiveTestUtils.mapType) Type(com.facebook.presto.common.type.Type) DateType(com.facebook.presto.common.type.DateType) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File)

Example 19 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class TestHiveFileFormats method testPageSourceFactory.

private void testPageSourceFactory(HiveBatchPageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) throws IOException {
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
    List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
    List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
    List<HiveColumnHandle> columnHandles = getColumnHandles(testColumns);
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(sourceFactory), new Configuration(), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), columnHandles, ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
    assertTrue(pageSource.isPresent());
    checkPageSource(pageSource.get(), testColumns, getTypes(columnHandles), rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Storage(com.facebook.presto.hive.metastore.Storage) Column(com.facebook.presto.hive.metastore.Column)

Example 20 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class TestHiveFileFormats method testCursorProvider.

private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) {
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
    List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
    List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
    Configuration configuration = new Configuration();
    configuration.set("io.compression.codecs", LzoCodec.class.getName() + "," + LzopCodec.class.getName());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(cursorProvider), ImmutableSet.of(), configuration, session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), getColumnHandles(testColumns), ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
    RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
    checkCursor(cursor, testColumns, rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RecordCursor(com.facebook.presto.spi.RecordCursor) Configuration(org.apache.hadoop.conf.Configuration) LzoCodec(io.airlift.compress.lzo.LzoCodec) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) RecordPageSource(com.facebook.presto.spi.RecordPageSource) Storage(com.facebook.presto.hive.metastore.Storage) Column(com.facebook.presto.hive.metastore.Column)

Aggregations

StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)20 ConnectorSession (com.facebook.presto.spi.ConnectorSession)15 List (java.util.List)15 Objects.requireNonNull (java.util.Objects.requireNonNull)15 Optional (java.util.Optional)15 Path (org.apache.hadoop.fs.Path)15 PrestoException (com.facebook.presto.spi.PrestoException)14 ImmutableMap (com.google.common.collect.ImmutableMap)14 IOException (java.io.IOException)14 Collectors.toList (java.util.stream.Collectors.toList)14 DateTimeZone (org.joda.time.DateTimeZone)14 SchemaTableName (com.facebook.presto.spi.SchemaTableName)12 Type (com.facebook.presto.common.type.Type)11 Test (org.testng.annotations.Test)11 DWRF (com.facebook.presto.hive.HiveStorageFormat.DWRF)10 ORC (com.facebook.presto.hive.HiveStorageFormat.ORC)10 PARQUET (com.facebook.presto.hive.HiveStorageFormat.PARQUET)10 Column (com.facebook.presto.hive.metastore.Column)10 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)10 REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)9