Search in sources :

Example 1 with HoodieTableMetadataWriter

use of org.apache.hudi.metadata.HoodieTableMetadataWriter in project hudi by apache.

the class TestCleaner method testCleaningWithZeroPartitionPaths.

/**
 * Test CLeaner Stat when there are no partition paths.
 */
@Test
public void testCleaningWithZeroPartitionPaths() throws Exception {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
    // Make a commit, although there are no partitionPaths.
    // Example use-case of this is when a client wants to create a table
    // with just some commit metadata, but no data/partitionPaths.
    HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
    HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
    testTable.doWriteOperation("001", WriteOperationType.INSERT, Collections.emptyList(), 1);
    metaClient = HoodieTableMetaClient.reload(metaClient);
    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
    assertTrue(hoodieCleanStatsOne.isEmpty(), "HoodieCleanStats should be empty for a table with empty partitionPaths");
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 2 with HoodieTableMetadataWriter

use of org.apache.hudi.metadata.HoodieTableMetadataWriter in project hudi by apache.

the class TestCleaner method testKeepLatestFileVersions.

/**
 * Test Hudi COW Table Cleaner - Keep the latest file versions policy.
 */
@ParameterizedTest
@ValueSource(booleans = { false, true })
public void testKeepLatestFileVersions(Boolean enableBootstrapSourceClean) throws Exception {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean).withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build()).build();
    HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
    HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
    final String p0 = "2020/01/01";
    final String p1 = "2020/01/02";
    final Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;
    // make 1 commit, with 1 file per partition
    final String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p0).get(0).getFileId() : UUID.randomUUID().toString();
    final String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p1).get(0).getFileId() : UUID.randomUUID().toString();
    Map<String, List<Pair<String, Integer>>> c1PartitionToFilesNameLengthMap = new HashMap<>();
    c1PartitionToFilesNameLengthMap.put(p0, Collections.singletonList(Pair.of(file1P0C0, 100)));
    c1PartitionToFilesNameLengthMap.put(p1, Collections.singletonList(Pair.of(file1P1C0, 200)));
    testTable.doWriteOperation("00000000000001", WriteOperationType.INSERT, Arrays.asList(p0, p1), c1PartitionToFilesNameLengthMap, false, false);
    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
    assertEquals(0, hoodieCleanStatsOne.size(), "Must not clean any files");
    assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
    assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
    // make next commit, with 1 insert & 1 update per partition
    final String file2P0C1 = UUID.randomUUID().toString();
    final String file2P1C1 = UUID.randomUUID().toString();
    Map<String, List<Pair<String, Integer>>> c2PartitionToFilesNameLengthMap = new HashMap<>();
    c2PartitionToFilesNameLengthMap.put(p0, Arrays.asList(Pair.of(file1P0C0, 101), Pair.of(file2P0C1, 100)));
    c2PartitionToFilesNameLengthMap.put(p1, Arrays.asList(Pair.of(file1P1C0, 201), Pair.of(file2P1C1, 200)));
    testTable.doWriteOperation("00000000000002", WriteOperationType.UPSERT, Collections.emptyList(), c2PartitionToFilesNameLengthMap, false, false);
    // enableBootstrapSourceClean would delete the bootstrap base file at the same time
    List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, 1);
    HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsTwo, p0);
    assertEquals(enableBootstrapSourceClean ? 2 : 1, cleanStat.getSuccessDeleteFiles().size() + (cleanStat.getSuccessDeleteBootstrapBaseFiles() == null ? 0 : cleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least 1 file");
    if (enableBootstrapSourceClean) {
        HoodieFileStatus fstatus = bootstrapMapping.get(p0).get(0).getBootstrapFileStatus();
        // This ensures full path is recorded in metadata.
        assertTrue(cleanStat.getSuccessDeleteBootstrapBaseFiles().contains(fstatus.getPath().getUri()), "Successful delete files were " + cleanStat.getSuccessDeleteBootstrapBaseFiles() + " but did not contain " + fstatus.getPath().getUri());
        assertFalse(Files.exists(Paths.get(bootstrapMapping.get(p0).get(0).getBootstrapFileStatus().getPath().getUri())));
    }
    cleanStat = getCleanStat(hoodieCleanStatsTwo, p1);
    assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
    assertTrue(testTable.baseFileExists(p1, "00000000000002", file2P1C1));
    assertFalse(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
    assertFalse(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
    assertEquals(enableBootstrapSourceClean ? 2 : 1, cleanStat.getSuccessDeleteFiles().size() + (cleanStat.getSuccessDeleteBootstrapBaseFiles() == null ? 0 : cleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least 1 file");
    if (enableBootstrapSourceClean) {
        HoodieFileStatus fstatus = bootstrapMapping.get(p1).get(0).getBootstrapFileStatus();
        // This ensures full path is recorded in metadata.
        assertTrue(cleanStat.getSuccessDeleteBootstrapBaseFiles().contains(fstatus.getPath().getUri()), "Successful delete files were " + cleanStat.getSuccessDeleteBootstrapBaseFiles() + " but did not contain " + fstatus.getPath().getUri());
        assertFalse(Files.exists(Paths.get(bootstrapMapping.get(p1).get(0).getBootstrapFileStatus().getPath().getUri())));
    }
    // make next commit, with 2 updates to existing files, and 1 insert
    final String file3P0C2 = UUID.randomUUID().toString();
    Map<String, List<Pair<String, Integer>>> c3PartitionToFilesNameLengthMap = new HashMap<>();
    c3PartitionToFilesNameLengthMap.put(p0, Arrays.asList(Pair.of(file1P0C0, 102), Pair.of(file2P0C1, 101), Pair.of(file3P0C2, 100)));
    testTable.doWriteOperation("00000000000003", WriteOperationType.UPSERT, Collections.emptyList(), c3PartitionToFilesNameLengthMap, false, false);
    List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, 3);
    assertEquals(2, getCleanStat(hoodieCleanStatsThree, p0).getSuccessDeleteFiles().size(), "Must clean two files");
    assertFalse(testTable.baseFileExists(p0, "00000000000002", file1P0C0));
    assertFalse(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
    assertTrue(testTable.baseFileExists(p0, "00000000000003", file3P0C2));
    // No cleaning on partially written file, with no commit.
    testTable.forCommit("00000000000004").withBaseFilesInPartition(p0, file3P0C2);
    List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config);
    assertEquals(0, hoodieCleanStatsFour.size(), "Must not clean any files");
    assertTrue(testTable.baseFileExists(p0, "00000000000003", file3P0C2));
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) HashMap(java.util.HashMap) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ArrayList(java.util.ArrayList) List(java.util.List) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with HoodieTableMetadataWriter

use of org.apache.hudi.metadata.HoodieTableMetadataWriter in project hudi by apache.

the class TestHoodieIndex method testSimpleGlobalIndexTagLocationWhenShouldUpdatePartitionPath.

@Test
public void testSimpleGlobalIndexTagLocationWhenShouldUpdatePartitionPath() throws Exception {
    setUp(IndexType.GLOBAL_SIMPLE, true, true);
    config = getConfigBuilder().withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(indexType).withGlobalSimpleIndexUpdatePartitionPath(true).withBloomIndexUpdatePartitionPath(true).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).withMetadataIndexBloomFilter(true).withMetadataIndexColumnStats(true).build()).build();
    writeClient = getHoodieWriteClient(config);
    index = writeClient.getIndex();
    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(writeClient.getEngineContext().getHadoopConf().get(), config, writeClient.getEngineContext());
    HoodieSparkWriteableTestTable testTable = HoodieSparkWriteableTestTable.of(hoodieTable.getMetaClient(), SCHEMA, metadataWriter);
    final String p1 = "2016/01/31";
    final String p2 = "2016/02/28";
    // Create the original partition, and put a record, along with the meta file
    // "2016/01/31": 1 file (1_0_20160131101010.parquet)
    // this record will be saved in table and will be tagged to an empty record
    RawTripTestPayload originalPayload = new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
    HoodieRecord originalRecord = new HoodieAvroRecord(new HoodieKey(originalPayload.getRowKey(), originalPayload.getPartitionPath()), originalPayload);
    /*
    This record has the same record key as originalRecord but different time so different partition
    Because GLOBAL_BLOOM_INDEX_SHOULD_UPDATE_PARTITION_PATH = true,
    globalBloomIndex should
    - tag the original partition of the originalRecord to an empty record for deletion, and
    - tag the new partition of the incomingRecord
    */
    RawTripTestPayload incomingPayload = new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-02-28T03:16:41.415Z\",\"number\":12}");
    HoodieRecord incomingRecord = new HoodieAvroRecord(new HoodieKey(incomingPayload.getRowKey(), incomingPayload.getPartitionPath()), incomingPayload);
    /*
    This record has the same record key as originalRecord and the same partition
    Though GLOBAL_BLOOM_INDEX_SHOULD_UPDATE_PARTITION_PATH = true,
    globalBloomIndex should just tag the original partition
    */
    RawTripTestPayload incomingPayloadSamePartition = new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T04:16:41.415Z\",\"number\":15}");
    HoodieRecord incomingRecordSamePartition = new HoodieAvroRecord(new HoodieKey(incomingPayloadSamePartition.getRowKey(), incomingPayloadSamePartition.getPartitionPath()), incomingPayloadSamePartition);
    final String file1P1C0 = UUID.randomUUID().toString();
    Map<String, List<Pair<String, Integer>>> c1PartitionToFilesNameLengthMap = new HashMap<>();
    // We have some records to be tagged (two different partitions)
    Path baseFilePath = testTable.forCommit("1000").withInserts(p1, file1P1C0, Collections.singletonList(originalRecord));
    long baseFileLength = fs.getFileStatus(baseFilePath).getLen();
    c1PartitionToFilesNameLengthMap.put(p1, Collections.singletonList(Pair.of(file1P1C0, Integer.valueOf((int) baseFileLength))));
    testTable.doWriteOperation("1000", WriteOperationType.INSERT, Arrays.asList(p1), c1PartitionToFilesNameLengthMap, false, false);
    // We have some records to be tagged (two different partitions)
    testTable.withInserts(p1, file1P1C0, originalRecord);
    // test against incoming record with a different partition
    JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Collections.singletonList(incomingRecord));
    JavaRDD<HoodieRecord> taggedRecordRDD = tagLocation(index, recordRDD, hoodieTable);
    assertEquals(2, taggedRecordRDD.count());
    for (HoodieRecord record : taggedRecordRDD.collect()) {
        switch(record.getPartitionPath()) {
            case p1:
                assertEquals("000", record.getRecordKey());
                assertTrue(record.getData() instanceof EmptyHoodieRecordPayload);
                break;
            case p2:
                assertEquals("000", record.getRecordKey());
                assertEquals(incomingPayload.getJsonData(), ((RawTripTestPayload) record.getData()).getJsonData());
                break;
            default:
                fail(String.format("Should not get partition path: %s", record.getPartitionPath()));
        }
    }
    // test against incoming record with the same partition
    JavaRDD<HoodieRecord> recordRDDSamePartition = jsc.parallelize(Collections.singletonList(incomingRecordSamePartition));
    JavaRDD<HoodieRecord> taggedRecordRDDSamePartition = tagLocation(index, recordRDDSamePartition, hoodieTable);
    assertEquals(1, taggedRecordRDDSamePartition.count());
    HoodieRecord record = taggedRecordRDDSamePartition.first();
    assertEquals("000", record.getRecordKey());
    assertEquals(p1, record.getPartitionPath());
    assertEquals(incomingPayloadSamePartition.getJsonData(), ((RawTripTestPayload) record.getData()).getJsonData());
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieKey(org.apache.hudi.common.model.HoodieKey) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) List(java.util.List) ArrayList(java.util.ArrayList) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with HoodieTableMetadataWriter

use of org.apache.hudi.metadata.HoodieTableMetadataWriter in project hudi by apache.

the class TestClientRollback method testRollbackCommit.

/**
 * Test Cases for effects of rolling back completed/inflight commits.
 */
@Test
public void testRollbackCommit() throws Exception {
    // Let's create some commit files and base files
    final String p1 = "2016/05/01";
    final String p2 = "2016/05/02";
    final String p3 = "2016/05/06";
    final String commitTime1 = "20160501010101";
    final String commitTime2 = "20160502020601";
    final String commitTime3 = "20160506030611";
    Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {

        {
            put(p1, "id11");
            put(p2, "id12");
            put(p3, "id13");
        }
    };
    Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {

        {
            put(p1, "id21");
            put(p2, "id22");
            put(p3, "id23");
        }
    };
    Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {

        {
            put(p1, "id31");
            put(p2, "id32");
            put(p3, "id33");
        }
    };
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withRollbackUsingMarkers(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build()).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
    HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
    HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap1 = new HashMap<>();
    partitionAndFileId1.forEach((k, v) -> partitionToFilesNameLengthMap1.put(k, Collections.singletonList(Pair.of(v, 100))));
    testTable.doWriteOperation(commitTime1, WriteOperationType.INSERT, Arrays.asList(p1, p2, p3), partitionToFilesNameLengthMap1, false, false);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap2 = new HashMap<>();
    partitionAndFileId2.forEach((k, v) -> partitionToFilesNameLengthMap2.put(k, Collections.singletonList(Pair.of(v, 200))));
    testTable.doWriteOperation(commitTime2, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap2, false, false);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap3 = new HashMap<>();
    partitionAndFileId3.forEach((k, v) -> partitionToFilesNameLengthMap3.put(k, Collections.singletonList(Pair.of(v, 300))));
    testTable.doWriteOperation(commitTime3, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap3, false, true);
    try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
        // Rollback commit3
        client.rollback(commitTime3);
        assertFalse(testTable.inflightCommitExists(commitTime3));
        assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
        assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        // simulate partial failure, where .inflight was not deleted, but data files were.
        testTable.addInflightCommit(commitTime3);
        client.rollback(commitTime3);
        assertFalse(testTable.inflightCommitExists(commitTime3));
        assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        // Rollback commit2
        client.rollback(commitTime2);
        assertFalse(testTable.commitExists(commitTime2));
        assertFalse(testTable.inflightCommitExists(commitTime2));
        assertFalse(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        // simulate partial failure, where only .commit => .inflight renaming succeeded, leaving a
        // .inflight commit and a bunch of data files around.
        testTable.addInflightCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2);
        client.rollback(commitTime2);
        assertFalse(testTable.commitExists(commitTime2));
        assertFalse(testTable.inflightCommitExists(commitTime2));
        assertFalse(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        // Let's rollback commit1, Check results
        client.rollback(commitTime1);
        assertFalse(testTable.commitExists(commitTime1));
        assertFalse(testTable.inflightCommitExists(commitTime1));
        assertFalse(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
    }
}
Also used : HashMap(java.util.HashMap) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) Test(org.junit.jupiter.api.Test)

Example 5 with HoodieTableMetadataWriter

use of org.apache.hudi.metadata.HoodieTableMetadataWriter in project hudi by apache.

the class TestClientRollback method testAutoRollbackInflightCommit.

/**
 * Test auto-rollback of commits which are in flight.
 */
@Test
public void testAutoRollbackInflightCommit() throws Exception {
    // Let's create some commit files and base files
    final String p1 = "2016/05/01";
    final String p2 = "2016/05/02";
    final String p3 = "2016/05/06";
    final String commitTime1 = "20160501010101";
    final String commitTime2 = "20160502020601";
    final String commitTime3 = "20160506030611";
    Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {

        {
            put(p1, "id11");
            put(p2, "id12");
            put(p3, "id13");
        }
    };
    Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {

        {
            put(p1, "id21");
            put(p2, "id22");
            put(p3, "id23");
        }
    };
    Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {

        {
            put(p1, "id31");
            put(p2, "id32");
            put(p3, "id33");
        }
    };
    // Set Failed Writes rollback to LAZY
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build()).build();
    HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
    HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap1 = new HashMap<>();
    partitionAndFileId1.forEach((k, v) -> partitionToFilesNameLengthMap1.put(k, Collections.singletonList(Pair.of(v, 100))));
    testTable.doWriteOperation(commitTime1, WriteOperationType.INSERT, Arrays.asList(p1, p2, p3), partitionToFilesNameLengthMap1, false, false);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap2 = new HashMap<>();
    partitionAndFileId2.forEach((k, v) -> partitionToFilesNameLengthMap2.put(k, Collections.singletonList(Pair.of(v, 200))));
    testTable.doWriteOperation(commitTime2, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap2, false, true);
    Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap3 = new HashMap<>();
    partitionAndFileId3.forEach((k, v) -> partitionToFilesNameLengthMap3.put(k, Collections.singletonList(Pair.of(v, 300))));
    testTable.doWriteOperation(commitTime3, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap3, false, true);
    final String commitTime4 = "20160506030621";
    try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
        client.startCommitWithTime(commitTime4);
        // Check results, nothing changed
        assertTrue(testTable.commitExists(commitTime1));
        assertTrue(testTable.inflightCommitExists(commitTime2));
        assertTrue(testTable.inflightCommitExists(commitTime3));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertTrue(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
    }
    // Set Failed Writes rollback to EAGER
    config = HoodieWriteConfig.newBuilder().withPath(basePath).withRollbackUsingMarkers(false).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
    final String commitTime5 = "20160506030631";
    try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
        client.startCommitWithTime(commitTime5);
        assertTrue(testTable.commitExists(commitTime1));
        assertFalse(testTable.inflightCommitExists(commitTime2));
        assertFalse(testTable.inflightCommitExists(commitTime3));
        assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
        assertFalse(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
        assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
    }
}
Also used : HashMap(java.util.HashMap) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieTableMetadataWriter (org.apache.hudi.metadata.HoodieTableMetadataWriter)7 HashMap (java.util.HashMap)6 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)6 Test (org.junit.jupiter.api.Test)6 List (java.util.List)5 HoodieTestTable (org.apache.hudi.common.testutils.HoodieTestTable)5 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)5 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)3 ArrayList (java.util.ArrayList)2 Path (org.apache.hadoop.fs.Path)2 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)2 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)2 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)2 IOException (java.io.IOException)1 Arrays (java.util.Arrays)1 Map (java.util.Map)1 Properties (java.util.Properties)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1