Search in sources :

Example 71 with ConnectorSession

use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.

the class AbstractTestHive method doInsertUnsupportedWriteType.

private void doInsertUnsupportedWriteType(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    List<Column> columns = ImmutableList.of(new Column("dummy", HiveType.valueOf("uniontype<smallint,tinyint>"), Optional.empty()));
    List<Column> partitionColumns = ImmutableList.of(new Column("name", HIVE_STRING, Optional.empty()));
    createEmptyTable(tableName, storageFormat, columns, partitionColumns);
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
        fail("expected failure");
    } catch (TrinoException e) {
        assertThat(e).hasMessageMatching("Inserting into Hive table .* with column type uniontype<smallint,tinyint> not supported");
    }
}
Also used : HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Column(io.trino.plugin.hive.metastore.Column) ViewColumn(io.trino.spi.connector.ConnectorViewDefinition.ViewColumn) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) TrinoException(io.trino.spi.TrinoException) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Example 72 with ConnectorSession

use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.

the class AbstractTestHive method doTestBucketedTableValidation.

private void doTestBucketedTableValidation(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    prepareInvalidBuckets(storageFormat, tableName);
    // read succeeds when validation is disabled
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession(ImmutableMap.of("validate_bucketing", false));
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
        // fewer rows due to deleted file
        assertEquals(result.getRowCount(), 87);
    }
    // read fails due to validation failure
    assertReadFailsWithMessageMatching(storageFormat, tableName, "Hive table is corrupt\\. File '.*/000002_0_.*' is for bucket 2, but contains a row for bucket 5.");
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Example 73 with ConnectorSession

use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.

the class AbstractTestHive method testNewDirectoryPermissions.

@Test
public void testNewDirectoryPermissions() throws Exception {
    SchemaTableName tableName = temporaryTable("empty_file");
    List<Column> columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty()));
    createEmptyTable(tableName, ORC, columns, ImmutableList.of(), Optional.empty());
    try {
        Transaction transaction = newTransaction();
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        Table table = transaction.getMetastore().getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow();
        // create new directory and set directory permission after creation
        HdfsContext context = new HdfsContext(session);
        Path location = new Path(table.getStorage().getLocation());
        Path defaultPath = new Path(location + "/defaultperms");
        createDirectory(context, hdfsEnvironment, defaultPath);
        FileStatus defaultFsStatus = hdfsEnvironment.getFileSystem(context, defaultPath).getFileStatus(defaultPath);
        assertEquals(defaultFsStatus.getPermission().toOctal(), 777);
        // use hdfs config that skips setting directory permissions after creation
        HdfsConfig configWithSkip = new HdfsConfig();
        configWithSkip.setNewDirectoryPermissions(HdfsConfig.SKIP_DIR_PERMISSIONS);
        HdfsEnvironment hdfsEnvironmentWithSkip = new HdfsEnvironment(createTestHdfsConfiguration(), configWithSkip, new NoHdfsAuthentication());
        Path skipPath = new Path(location + "/skipperms");
        createDirectory(context, hdfsEnvironmentWithSkip, skipPath);
        FileStatus skipFsStatus = hdfsEnvironmentWithSkip.getFileSystem(context, skipPath).getFileStatus(skipPath);
        assertEquals(skipFsStatus.getPermission().toOctal(), 755);
    } finally {
        dropTable(tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) Table(io.trino.plugin.hive.metastore.Table) FileStatus(org.apache.hadoop.fs.FileStatus) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Column(io.trino.plugin.hive.metastore.Column) ViewColumn(io.trino.spi.connector.ConnectorViewDefinition.ViewColumn) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) Test(org.testng.annotations.Test)

Example 74 with ConnectorSession

use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.

the class TestHiveFileFormats method createPageSourceFromCursorProvider.

private ConnectorPageSource createPageSourceFromCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, Properties splitProperties, long fileSize, List<TestColumn> testReadColumns, ConnectorSession session) {
    // Use full columns in split properties
    ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
    ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
    Set<String> baseColumnNames = new HashSet<>();
    for (TestColumn testReadColumn : testReadColumns) {
        String name = testReadColumn.getBaseName();
        if (!baseColumnNames.contains(name) && !testReadColumn.isPartitionKey()) {
            baseColumnNames.add(name);
            splitPropertiesColumnNames.add(name);
            splitPropertiesColumnTypes.add(testReadColumn.getBaseObjectInspector().getTypeName());
        }
    }
    splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
    splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
    List<HivePartitionKey> partitionKeys = testReadColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
    String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
    Configuration configuration = new Configuration(false);
    configuration.set("io.compression.codecs", LzoCodec.class.getName() + "," + LzopCodec.class.getName());
    List<HiveColumnHandle> columnHandles = getColumnHandles(testReadColumns);
    List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), fileSize, Instant.now().toEpochMilli());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(cursorProvider), configuration, session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), fileSize, splitProperties, TupleDomain.all(), columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
    return pageSource.get();
}
Also used : OrcFileWriterFactory(io.trino.plugin.hive.orc.OrcFileWriterFactory) ParquetFileWriterFactory(io.trino.plugin.hive.parquet.ParquetFileWriterFactory) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) HiveTestUtils.createGenericHiveRecordCursorProvider(io.trino.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) TrinoExceptionAssert.assertTrinoExceptionThrownBy(io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy) PARQUET(io.trino.plugin.hive.HiveStorageFormat.PARQUET) FileSplit(org.apache.hadoop.mapred.FileSplit) Locale(java.util.Locale) Configuration(org.apache.hadoop.conf.Configuration) StructuralTestUtil.rowBlockOf(io.trino.testing.StructuralTestUtil.rowBlockOf) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) AVRO(io.trino.plugin.hive.HiveStorageFormat.AVRO) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) LzoCodec(io.airlift.compress.lzo.LzoCodec) ImmutableSet(com.google.common.collect.ImmutableSet) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ColumnMapping.buildColumnMappings(io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping.buildColumnMappings) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) RcFilePageSourceFactory(io.trino.plugin.hive.rcfile.RcFilePageSourceFactory) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) CSV(io.trino.plugin.hive.HiveStorageFormat.CSV) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ParquetPageSourceFactory(io.trino.plugin.hive.parquet.ParquetPageSourceFactory) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ParquetWriterConfig(io.trino.plugin.hive.parquet.ParquetWriterConfig) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SEQUENCEFILE(io.trino.plugin.hive.HiveStorageFormat.SEQUENCEFILE) OrcReaderOptions(io.trino.orc.OrcReaderOptions) OrcPageSourceFactory(io.trino.plugin.hive.orc.OrcPageSourceFactory) RecordPageSource(io.trino.spi.connector.RecordPageSource) Objects.requireNonNull(java.util.Objects.requireNonNull) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) JSON(io.trino.plugin.hive.HiveStorageFormat.JSON) OrcWriterConfig(io.trino.plugin.hive.orc.OrcWriterConfig) RCBINARY(io.trino.plugin.hive.HiveStorageFormat.RCBINARY) RecordCursor(io.trino.spi.connector.RecordCursor) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveTestUtils.getTypes(io.trino.plugin.hive.HiveTestUtils.getTypes) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TupleDomain(io.trino.spi.predicate.TupleDomain) UTC(org.joda.time.DateTimeZone.UTC) File(java.io.File) TestingConnectorSession(io.trino.testing.TestingConnectorSession) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) OrcWriterOptions(io.trino.orc.OrcWriterOptions) RCTEXT(io.trino.plugin.hive.HiveStorageFormat.RCTEXT) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) LzoCodec(io.airlift.compress.lzo.LzoCodec) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HashSet(java.util.HashSet)

Example 75 with ConnectorSession

use of io.trino.spi.connector.ConnectorSession in project trino by trinodb.

the class TestHiveFileFormats method testOrcOptimizedWriter.

@Test(dataProvider = "validRowAndFileSizePadding")
public void testOrcOptimizedWriter(int rowCount, long fileSizePadding) throws Exception {
    HiveSessionProperties hiveSessionProperties = new HiveSessionProperties(new HiveConfig(), new OrcReaderConfig(), new OrcWriterConfig().setValidationPercentage(100.0), new ParquetReaderConfig(), new ParquetWriterConfig());
    ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(hiveSessionProperties.getSessionProperties()).build();
    // A Trino page cannot contain a map with null keys, so a page based writer cannot write null keys
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(TestHiveFileFormats::withoutNullMapKeyTests).collect(toList());
    assertThatFileFormat(ORC).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileSizePadding(fileSizePadding).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), STATS, new OrcWriterOptions())).isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
}
Also used : ParquetWriterConfig(io.trino.plugin.hive.parquet.ParquetWriterConfig) OrcWriterConfig(io.trino.plugin.hive.orc.OrcWriterConfig) OrcPageSourceFactory(io.trino.plugin.hive.orc.OrcPageSourceFactory) OrcFileWriterFactory(io.trino.plugin.hive.orc.OrcFileWriterFactory) OrcWriterOptions(io.trino.orc.OrcWriterOptions) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) OrcReaderOptions(io.trino.orc.OrcReaderOptions) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) Test(org.testng.annotations.Test)

Aggregations

ConnectorSession (io.trino.spi.connector.ConnectorSession)252 SchemaTableName (io.trino.spi.connector.SchemaTableName)131 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)122 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)119 List (java.util.List)111 ColumnHandle (io.trino.spi.connector.ColumnHandle)108 Optional (java.util.Optional)107 ImmutableList (com.google.common.collect.ImmutableList)98 Objects.requireNonNull (java.util.Objects.requireNonNull)97 TrinoException (io.trino.spi.TrinoException)94 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)92 Map (java.util.Map)88 TestingConnectorSession (io.trino.testing.TestingConnectorSession)87 ImmutableMap (com.google.common.collect.ImmutableMap)85 TupleDomain (io.trino.spi.predicate.TupleDomain)85 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)83 Test (org.testng.annotations.Test)82 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)80 Constraint (io.trino.spi.connector.Constraint)76 Type (io.trino.spi.type.Type)72