Search in sources :

Example 11 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class TestStatsCommand method testFileSizeStats.

/**
 * Test case for command 'stats filesizes'.
 */
@Test
public void testFileSizeStats() throws Exception {
    String commit1 = "100";
    String commit2 = "101";
    Map<String, Integer[]> data = new LinkedHashMap<>();
    data.put(commit1, new Integer[] { 100, 120, 150 });
    data.put(commit2, new Integer[] { 200, 180, 250, 300 });
    // generate data file
    String partition1 = HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
    String partition2 = HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
    String partition3 = HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH;
    HoodieTestTable testTable = HoodieTestTable.of(HoodieCLI.getTableMetaClient());
    Integer[] data1 = data.get(commit1);
    assertTrue(3 <= data1.length);
    testTable.addCommit(commit1).withBaseFilesInPartition(partition1, data1[0]).withBaseFilesInPartition(partition2, data1[1]).withBaseFilesInPartition(partition3, data1[2]);
    Integer[] data2 = data.get(commit2);
    assertTrue(4 <= data2.length);
    testTable.addCommit(commit2).withBaseFilesInPartition(partition1, data2[0]).withBaseFilesInPartition(partition2, data2[1], data2[2]).withBaseFilesInPartition(partition3, data2[3]);
    CommandResult cr = shell().executeCommand("stats filesizes");
    assertTrue(cr.isSuccess());
    Histogram globalHistogram = new Histogram(new UniformReservoir(StatsCommand.MAX_FILES));
    HashMap<String, Histogram> commitHistoMap = new HashMap<>();
    data.forEach((k, v) -> {
        commitHistoMap.put(k, new Histogram(new UniformReservoir(StatsCommand.MAX_FILES)));
        for (int value : v) {
            commitHistoMap.get(k).update(value);
            globalHistogram.update(value);
        }
    });
    // generate expect
    List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, Histogram> entry : commitHistoMap.entrySet()) {
        Snapshot s = entry.getValue().getSnapshot();
        rows.add(new StatsCommand().printFileSizeHistogram(entry.getKey(), s));
    }
    Snapshot s = globalHistogram.getSnapshot();
    rows.add(new StatsCommand().printFileSizeHistogram("ALL", s));
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MIN).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_10TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_50TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_AVG).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_95TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MAX).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_NUM_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_STD_DEV);
    String expect = HoodiePrintHelper.print(header, new StatsCommand().getFieldNameToConverterMap(), "", false, -1, false, rows);
    expect = removeNonWordAndStripSpace(expect);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expect, got);
}
Also used : Histogram(com.codahale.metrics.Histogram) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) CommandResult(org.springframework.shell.core.CommandResult) Snapshot(com.codahale.metrics.Snapshot) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) UniformReservoir(com.codahale.metrics.UniformReservoir) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Test(org.junit.jupiter.api.Test)

Example 12 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class TestFileSystemBackedTableMetadata method testDatePartitionedTableWithAssumeDateIsFalse.

/**
 * Test listing of partitions result for date based partitions with assumeDataPartitioning = false.
 * @throws Exception
 */
@Test
public void testDatePartitionedTableWithAssumeDateIsFalse() throws Exception {
    String instant = "100";
    hoodieTestTable = hoodieTestTable.addCommit(instant);
    // Generate 10 files under each partition
    DATE_PARTITIONS.stream().forEach(p -> {
        try {
            hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p).withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
    Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
    List<String> fullPartitionPaths = DATE_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    for (String p : fullPartitionPaths) {
        Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
    }
}
Also used : IntStream(java.util.stream.IntStream) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Assertions(org.junit.jupiter.api.Assertions) Path(org.apache.hadoop.fs.Path) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Collections(java.util.Collections) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) Test(org.junit.jupiter.api.Test)

Example 13 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class TestFileSystemBackedTableMetadata method testDatePartitionedTable.

/**
 * Test listing of partitions result for date based partitions.
 * @throws Exception
 */
@Test
public void testDatePartitionedTable() throws Exception {
    String instant = "100";
    hoodieTestTable = hoodieTestTable.addCommit(instant);
    // Generate 10 files under each partition
    DATE_PARTITIONS.stream().forEach(p -> {
        try {
            hoodieTestTable = hoodieTestTable.withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, true);
    Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
    Assertions.assertEquals(10, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + DATE_PARTITIONS.get(0))).length);
    List<String> fullPartitionPaths = DATE_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    for (String p : fullPartitionPaths) {
        Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IntStream(java.util.stream.IntStream) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Assertions(org.junit.jupiter.api.Assertions) Path(org.apache.hadoop.fs.Path) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Collections(java.util.Collections) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) Test(org.junit.jupiter.api.Test)

Example 14 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class TestFileSystemBackedTableMetadata method testMultiLevelEmptyPartitionTable.

@Test
public void testMultiLevelEmptyPartitionTable() throws Exception {
    String instant = "100";
    hoodieTestTable = hoodieTestTable.addCommit(instant);
    // Generate 10 files under each partition
    MULTI_LEVEL_PARTITIONS.stream().forEach(p -> {
        try {
            hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
    Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
    Assertions.assertEquals(0, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length);
    List<String> fullPartitionPaths = MULTI_LEVEL_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    for (String p : fullPartitionPaths) {
        Assertions.assertEquals(0, partitionToFilesMap.get(p).length);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IntStream(java.util.stream.IntStream) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Assertions(org.junit.jupiter.api.Assertions) Path(org.apache.hadoop.fs.Path) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Collections(java.util.Collections) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) Test(org.junit.jupiter.api.Test)

Example 15 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class TestClientRollback method testRollbackCommit.

/**
 * Test Cases for effects of rolling back completed/inflight commits.
 */
@Test
public void testRollbackCommit() throws Exception {
    // Let's create some commit files and base files
    final String p1 = "2016/05/01";
    final String p2 = "2016/05/02";
    final String p3 = "2016/05/06";
    final String commitTime1 = "20160501010101";
    final String commitTime2 = "20160502020601";
    final String commitTime3 = "20160506030611";
    Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {

        {
            put(p1, "id11");
            put(p2, "id12");
            put(p3, "id13");
        }
    };
    Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {

        {
            put(p1, "id21");
            put(p2, "id22");
            put(p3, "id23");
        }
    };
    Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {

        {
            put(p1, "id31");
            put(p2, "id32");
            put(p3, "id33");
        }
    };
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withRollbackUsingMarkers(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build()).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
    HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
    HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap1 = new HashMap<>();
    partitionAndFileId1.forEach((k, v) -> partitionToFilesNameLengthMap1.put(k, Collections.singletonList(Pair.of(v, 100))));
    testTable.doWriteOperation(commitTime1, WriteOperationType.INSERT, Arrays.asList(p1, p2, p3), partitionToFilesNameLengthMap1, false, false);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap2 = new HashMap<>();
    partitionAndFileId2.forEach((k, v) -> partitionToFilesNameLengthMap2.put(k, Collections.singletonList(Pair.of(v, 200))));
    testTable.doWriteOperation(commitTime2, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap2, false, false);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap3 = new HashMap<>();
    partitionAndFileId3.forEach((k, v) -> partitionToFilesNameLengthMap3.put(k, Collections.singletonList(Pair.of(v, 300))));
    testTable.doWriteOperation(commitTime3, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap3, false, true);
    try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
        // Rollback commit3
        client.rollback(commitTime3);
        assertFalse(testTable.inflightCommitExists(commitTime3));
        assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
        assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        // simulate partial failure, where .inflight was not deleted, but data files were.
        testTable.addInflightCommit(commitTime3);
        client.rollback(commitTime3);
        assertFalse(testTable.inflightCommitExists(commitTime3));
        assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        // Rollback commit2
        client.rollback(commitTime2);
        assertFalse(testTable.commitExists(commitTime2));
        assertFalse(testTable.inflightCommitExists(commitTime2));
        assertFalse(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        // simulate partial failure, where only .commit => .inflight renaming succeeded, leaving a
        // .inflight commit and a bunch of data files around.
        testTable.addInflightCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2);
        client.rollback(commitTime2);
        assertFalse(testTable.commitExists(commitTime2));
        assertFalse(testTable.inflightCommitExists(commitTime2));
        assertFalse(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        // Let's rollback commit1, Check results
        client.rollback(commitTime1);
        assertFalse(testTable.commitExists(commitTime1));
        assertFalse(testTable.inflightCommitExists(commitTime1));
        assertFalse(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
    }
}
Also used : HashMap(java.util.HashMap) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieTestTable (org.apache.hudi.common.testutils.HoodieTestTable)22 Test (org.junit.jupiter.api.Test)17 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)14 List (java.util.List)12 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)11 HashMap (java.util.HashMap)10 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)10 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)9 Map (java.util.Map)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 IOException (java.io.IOException)7 Arrays (java.util.Arrays)7 Collections (java.util.Collections)7 Collectors (java.util.stream.Collectors)7 Path (org.apache.hadoop.fs.Path)7 HoodieCommonTestHarness (org.apache.hudi.common.testutils.HoodieCommonTestHarness)7 HoodieTableMetadataWriter (org.apache.hudi.metadata.HoodieTableMetadataWriter)7 Assertions (org.junit.jupiter.api.Assertions)7 BeforeEach (org.junit.jupiter.api.BeforeEach)7 ArrayList (java.util.ArrayList)5