Search in sources :

Example 26 with CacheConfig

use of com.facebook.presto.cache.CacheConfig in project presto by prestodb.

the class TestAlluxioCachingFileSystem method testBasicReadWithAsyncRestoreFailure.

@Test(timeOut = 30_000)
public void testBasicReadWithAsyncRestoreFailure() throws Exception {
    File cacheDirectory = new File(this.cacheDirectory.getPath());
    cacheDirectory.setWritable(false);
    CacheConfig cacheConfig = new CacheConfig().setCacheType(ALLUXIO).setCachingEnabled(true).setBaseDirectory(this.cacheDirectory);
    AlluxioCacheConfig alluxioCacheConfig = new AlluxioCacheConfig();
    Configuration configuration = getHdfsConfiguration(cacheConfig, alluxioCacheConfig);
    configuration.set("alluxio.user.client.cache.async.restore.enabled", String.valueOf(true));
    try {
        AlluxioCachingFileSystem fileSystem = cachingFileSystem(configuration);
        long state = MetricsSystem.counter(MetricKey.CLIENT_CACHE_STATE.getName()).getCount();
        assertTrue(state == CacheManager.State.READ_ONLY.getValue() || state == CacheManager.State.NOT_IN_USE.getValue());
        // different cases of read can still proceed even cache is read-only or not-in-use
        byte[] buffer = new byte[PAGE_SIZE * 2];
        int pageOffset = PAGE_SIZE;
        // new read
        resetBaseline();
        assertEquals(readFully(fileSystem, pageOffset + 10, buffer, 0, 100), 100);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 0);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, 100);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, PAGE_SIZE);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, 0);
        validateBuffer(data, pageOffset + 10, buffer, 0, 100);
        // read within the cached page
        resetBaseline();
        assertEquals(readFully(fileSystem, pageOffset + 20, buffer, 0, 90), 90);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 0);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, 90);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, PAGE_SIZE);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, 0);
        validateBuffer(data, pageOffset + 20, buffer, 0, 90);
        // read partially after the range of the cache
        resetBaseline();
        assertEquals(readFully(fileSystem, pageOffset + PAGE_SIZE - 10, buffer, 0, 100), 100);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 0);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, 100);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, 2 * PAGE_SIZE);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, 0);
        validateBuffer(data, pageOffset + PAGE_SIZE - 10, buffer, 0, 100);
        // read partially before the range of the cache
        resetBaseline();
        assertEquals(readFully(fileSystem, pageOffset - 10, buffer, 10, 50), 50);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 0);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, 50);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, 2 * PAGE_SIZE);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, 0);
        validateBuffer(data, pageOffset - 10, buffer, 10, 50);
        // skip one page
        resetBaseline();
        assertEquals(readFully(fileSystem, pageOffset + PAGE_SIZE * 3, buffer, 40, 50), 50);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 0);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, 50);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, PAGE_SIZE);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, 0);
        validateBuffer(data, pageOffset + PAGE_SIZE * 3, buffer, 40, 50);
        // read between cached pages
        resetBaseline();
        assertEquals(readFully(fileSystem, pageOffset + PAGE_SIZE * 2 - 10, buffer, 400, PAGE_SIZE + 20), PAGE_SIZE + 20);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 0);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, PAGE_SIZE + 20);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, 3 * PAGE_SIZE);
        checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, 0);
        validateBuffer(data, pageOffset + PAGE_SIZE * 2 - 10, buffer, 400, PAGE_SIZE + 20);
        state = MetricsSystem.counter(MetricKey.CLIENT_CACHE_STATE.getName()).getCount();
        assertTrue(state == CacheManager.State.READ_ONLY.getValue() || state == CacheManager.State.NOT_IN_USE.getValue());
    } finally {
        cacheDirectory.setWritable(true);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) File(java.io.File) CacheConfig(com.facebook.presto.cache.CacheConfig) Test(org.testng.annotations.Test) TestingCacheUtils.stressTest(com.facebook.presto.cache.TestingCacheUtils.stressTest)

Example 27 with CacheConfig

use of com.facebook.presto.cache.CacheConfig in project presto by prestodb.

the class TestAlluxioCachingFileSystem method testQuotaBasics.

@Test(timeOut = 30_000)
public void testQuotaBasics() throws Exception {
    DataSize quotaSize = DataSize.succinctDataSize(1, KILOBYTE);
    CacheQuota cacheQuota = new CacheQuota("test.table", Optional.of(quotaSize));
    CacheConfig cacheConfig = new CacheConfig().setCacheType(ALLUXIO).setCachingEnabled(true).setBaseDirectory(cacheDirectory).setValidationEnabled(false).setCacheQuotaScope(TABLE);
    AlluxioCacheConfig alluxioCacheConfig = new AlluxioCacheConfig().setCacheQuotaEnabled(true);
    Configuration configuration = getHdfsConfiguration(cacheConfig, alluxioCacheConfig);
    AlluxioCachingFileSystem fileSystem = cachingFileSystem(configuration);
    byte[] buffer = new byte[10240];
    // read within the cache quota
    resetBaseline();
    assertEquals(readFully(fileSystem, cacheQuota, 42, buffer, 0, 100), 100);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 0);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, 100);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, PAGE_SIZE);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, 0);
    validateBuffer(data, 42, buffer, 0, 100);
    // read beyond cache quota
    resetBaseline();
    assertEquals(readFully(fileSystem, cacheQuota, 47, buffer, 0, 9000), 9000);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, PAGE_SIZE - 47);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, 9000 - PAGE_SIZE + 47);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, (9000 / PAGE_SIZE) * PAGE_SIZE);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, (9000 / PAGE_SIZE) * PAGE_SIZE);
    validateBuffer(data, 47, buffer, 0, 9000);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataSize(io.airlift.units.DataSize) CacheConfig(com.facebook.presto.cache.CacheConfig) CacheQuota(com.facebook.presto.hive.CacheQuota) Test(org.testng.annotations.Test) TestingCacheUtils.stressTest(com.facebook.presto.cache.TestingCacheUtils.stressTest)

Example 28 with CacheConfig

use of com.facebook.presto.cache.CacheConfig in project presto by prestodb.

the class TestAlluxioCachingFileSystem method testQuotaUpdated.

@Test(timeOut = 30_000)
public void testQuotaUpdated() throws Exception {
    CacheQuota smallCacheQuota = new CacheQuota("test.table", Optional.of(DataSize.succinctDataSize(1, KILOBYTE)));
    CacheConfig cacheConfig = new CacheConfig().setCacheType(ALLUXIO).setCachingEnabled(true).setBaseDirectory(cacheDirectory).setValidationEnabled(false).setCacheQuotaScope(TABLE);
    AlluxioCacheConfig alluxioCacheConfig = new AlluxioCacheConfig().setCacheQuotaEnabled(true);
    Configuration configuration = getHdfsConfiguration(cacheConfig, alluxioCacheConfig);
    AlluxioCachingFileSystem fileSystem = cachingFileSystem(configuration);
    byte[] buffer = new byte[10240];
    // read beyond the small cache quota
    resetBaseline();
    assertEquals(readFully(fileSystem, smallCacheQuota, 0, buffer, 0, 9000), 9000);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 0);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, 9000);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, (9000 / PAGE_SIZE + 1) * PAGE_SIZE);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, (9000 / PAGE_SIZE) * PAGE_SIZE);
    validateBuffer(data, 0, buffer, 0, 9000);
    // read again within an updated larger cache quota
    CacheQuota largeCacheQuota = new CacheQuota("test.table", Optional.of(DataSize.succinctDataSize(10, KILOBYTE)));
    resetBaseline();
    assertEquals(readFully(fileSystem, largeCacheQuota, 0, buffer, 0, 9000), 9000);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE, 9000 - (9000 / PAGE_SIZE) * PAGE_SIZE);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_REQUESTED_EXTERNAL, (9000 / PAGE_SIZE) * PAGE_SIZE);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_READ_EXTERNAL, (9000 / PAGE_SIZE) * PAGE_SIZE);
    checkMetrics(MetricKey.CLIENT_CACHE_BYTES_EVICTED, 0);
    validateBuffer(data, 0, buffer, 0, 9000);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CacheConfig(com.facebook.presto.cache.CacheConfig) CacheQuota(com.facebook.presto.hive.CacheQuota) Test(org.testng.annotations.Test) TestingCacheUtils.stressTest(com.facebook.presto.cache.TestingCacheUtils.stressTest)

Example 29 with CacheConfig

use of com.facebook.presto.cache.CacheConfig in project presto by prestodb.

the class TestHivePageSink method createPageSink.

private static ConnectorPageSink createPageSink(HiveTransactionHandle transaction, HiveClientConfig config, MetastoreClientConfig metastoreClientConfig, ExtendedHiveMetastore metastore, Path outputPath, HiveWriterStats stats) {
    LocationHandle locationHandle = new LocationHandle(outputPath, outputPath, Optional.empty(), NEW, DIRECT_TO_TARGET_NEW_DIRECTORY);
    HiveOutputTableHandle handle = new HiveOutputTableHandle(SCHEMA_NAME, TABLE_NAME, getColumnHandles(), new HivePageSinkMetadata(new SchemaTableName(SCHEMA_NAME, TABLE_NAME), metastore.getTable(METASTORE_CONTEXT, SCHEMA_NAME, TABLE_NAME), ImmutableMap.of()), locationHandle, config.getHiveStorageFormat(), config.getHiveStorageFormat(), config.getHiveStorageFormat(), config.getCompressionCodec(), ImmutableList.of(), Optional.empty(), ImmutableList.of(), "test", ImmutableMap.of(), Optional.empty());
    HdfsEnvironment hdfsEnvironment = createTestHdfsEnvironment(config, metastoreClientConfig);
    HivePageSinkProvider provider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(config, metastoreClientConfig), hdfsEnvironment, PAGE_SORTER, metastore, new GroupByHashPageIndexerFactory(new JoinCompiler(MetadataManager.createTestMetadataManager(), new FeaturesConfig())), FUNCTION_AND_TYPE_MANAGER, config, metastoreClientConfig, new HiveLocationService(hdfsEnvironment), HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, new TestingNodeManager("fake-environment"), new HiveEventClient(), new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()), stats, getDefaultOrcFileWriterFactory(config, metastoreClientConfig), HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    return provider.createPageSink(transaction, getSession(config), handle, TEST_HIVE_PAGE_SINK_CONTEXT);
}
Also used : JoinCompiler(com.facebook.presto.sql.gen.JoinCompiler) FeaturesConfig(com.facebook.presto.sql.analyzer.FeaturesConfig) HivePageSinkMetadata(com.facebook.presto.hive.metastore.HivePageSinkMetadata) SchemaTableName(com.facebook.presto.spi.SchemaTableName) HiveTestUtils.createTestHdfsEnvironment(com.facebook.presto.hive.HiveTestUtils.createTestHdfsEnvironment) TestingNodeManager(com.facebook.presto.testing.TestingNodeManager) GroupByHashPageIndexerFactory(com.facebook.presto.GroupByHashPageIndexerFactory) CacheConfig(com.facebook.presto.cache.CacheConfig)

Example 30 with CacheConfig

use of com.facebook.presto.cache.CacheConfig in project presto by prestodb.

the class TestHivePageSourceProvider method testUseRecordReaderWithInputFormatAnnotationAndCustomSplit.

@Test
public void testUseRecordReaderWithInputFormatAnnotationAndCustomSplit() {
    StorageFormat storageFormat = StorageFormat.create(ParquetHiveSerDe.class.getName(), HoodieParquetInputFormat.class.getName(), "");
    Storage storage = new Storage(storageFormat, "test", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of());
    Map<String, String> customSplitInfo = ImmutableMap.of(CUSTOM_FILE_SPLIT_CLASS_KEY, HoodieRealtimeFileSplit.class.getName(), HUDI_BASEPATH_KEY, "/test/file.parquet", HUDI_DELTA_FILEPATHS_KEY, "/test/.file_100.log", HUDI_MAX_COMMIT_TIME_KEY, "100");
    HiveRecordCursorProvider recordCursorProvider = new MockHiveRecordCursorProvider();
    HiveBatchPageSourceFactory hiveBatchPageSourceFactory = new MockHiveBatchPageSourceFactory();
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(recordCursorProvider), ImmutableSet.of(hiveBatchPageSourceFactory), new Configuration(), new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setUseRecordPageSourceForCustomSplit(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new Path("/test/"), OptionalInt.empty(), 0, 100, 200, Instant.now().toEpochMilli(), storage, TupleDomain.none(), ImmutableList.of(), ImmutableMap.of(), ImmutableList.of(), DateTimeZone.UTC, new TestingTypeManager(), new SchemaTableName("test", "test"), ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), 0, TableToPartitionMapping.empty(), Optional.empty(), false, null, null, false, null, Optional.empty(), customSplitInfo);
    assertTrue(pageSource.isPresent());
    assertTrue(pageSource.get() instanceof RecordPageSource);
}
Also used : HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Path(org.apache.hadoop.fs.Path) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Configuration(org.apache.hadoop.conf.Configuration) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HoodieParquetInputFormat(org.apache.hudi.hadoop.HoodieParquetInputFormat) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) RecordPageSource(com.facebook.presto.spi.RecordPageSource) Storage(com.facebook.presto.hive.metastore.Storage) CacheConfig(com.facebook.presto.cache.CacheConfig) TestingTypeManager(com.facebook.presto.common.type.TestingTypeManager) Test(org.testng.annotations.Test)

Aggregations

CacheConfig (com.facebook.presto.cache.CacheConfig)45 Test (org.testng.annotations.Test)33 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)31 ConnectorSession (com.facebook.presto.spi.ConnectorSession)18 Configuration (org.apache.hadoop.conf.Configuration)12 SchemaTableName (com.facebook.presto.spi.SchemaTableName)9 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)8 HiveClientConfig (com.facebook.presto.hive.HiveClientConfig)7 HiveSessionProperties (com.facebook.presto.hive.HiveSessionProperties)7 OrcFileWriterConfig (com.facebook.presto.hive.OrcFileWriterConfig)7 ParquetFileWriterConfig (com.facebook.presto.hive.ParquetFileWriterConfig)7 TestingCacheUtils.stressTest (com.facebook.presto.cache.TestingCacheUtils.stressTest)6 Storage (com.facebook.presto.hive.metastore.Storage)6 File (java.io.File)6 StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)5 OutputStreamDataSinkFactory (com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory)4 RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)4 DataSize (io.airlift.units.DataSize)4 BeforeClass (org.testng.annotations.BeforeClass)4 CounterStat (com.facebook.airlift.stats.CounterStat)3