Search in sources :

Example 6 with SmallFile

use of org.apache.hudi.table.action.commit.SmallFile in project hudi by apache.

the class TestBucketAssigner method testWriteProfileReload.

@Test
public void testWriteProfileReload() throws Exception {
    WriteProfile writeProfile = new WriteProfile(writeConfig, context);
    List<SmallFile> smallFiles1 = writeProfile.getSmallFiles("par1");
    assertTrue(smallFiles1.isEmpty(), "Should have no small files");
    TestData.writeData(TestData.DATA_SET_INSERT, conf);
    String instantOption = getLastCompleteInstant(writeProfile);
    assertNull(instantOption);
    writeProfile.reload(1);
    String instant1 = getLastCompleteInstant(writeProfile);
    assertNotNull(instant1);
    List<SmallFile> smallFiles2 = writeProfile.getSmallFiles("par1");
    assertThat("Should have 1 small file", smallFiles2.size(), is(1));
    assertThat("Small file should have same timestamp as last complete instant", smallFiles2.get(0).location.getInstantTime(), is(instant1));
    TestData.writeData(TestData.DATA_SET_INSERT, conf);
    List<SmallFile> smallFiles3 = writeProfile.getSmallFiles("par1");
    assertThat("Should have 1 small file", smallFiles3.size(), is(1));
    assertThat("Non-reloaded write profile has the same base file view as before", smallFiles3.get(0).location.getInstantTime(), is(instant1));
    writeProfile.reload(2);
    String instant2 = getLastCompleteInstant(writeProfile);
    assertNotEquals(instant2, instant1, "Should have new complete instant");
    List<SmallFile> smallFiles4 = writeProfile.getSmallFiles("par1");
    assertThat("Should have 1 small file", smallFiles4.size(), is(1));
    assertThat("Small file should have same timestamp as last complete instant", smallFiles4.get(0).location.getInstantTime(), is(instant2));
}
Also used : SmallFile(org.apache.hudi.table.action.commit.SmallFile) WriteProfile(org.apache.hudi.sink.partitioner.profile.WriteProfile) Test(org.junit.jupiter.api.Test)

Example 7 with SmallFile

use of org.apache.hudi.table.action.commit.SmallFile in project hudi by apache.

the class TestBucketAssigner method testSmallFilesOfThisTask.

/**
 * Test that the file ids generated by the task can finally shuffled to itself.
 */
@Test
void testSmallFilesOfThisTask() {
    MockBucketAssigner mockBucketAssigner1 = new MockBucketAssigner(context, writeConfig);
    String fileId1 = mockBucketAssigner1.createFileIdOfThisTask();
    SmallFile smallFile1 = new SmallFile();
    smallFile1.location = new HoodieRecordLocation("t0", fileId1);
    smallFile1.sizeBytes = 123;
    List<SmallFile> smallFiles1 = mockBucketAssigner1.smallFilesOfThisTask(Collections.singletonList(smallFile1));
    assertThat(smallFiles1.size(), is(1));
    // modify the parallelism and test again
    MockBucketAssigner mockBucketAssigner2 = new MockBucketAssigner(123, 200, context, writeConfig, Collections.emptyMap());
    String fileId2 = mockBucketAssigner2.createFileIdOfThisTask();
    SmallFile smallFile2 = new SmallFile();
    smallFile2.location = new HoodieRecordLocation("t0", fileId2);
    smallFile2.sizeBytes = 123;
    String fileId3 = mockBucketAssigner2.createFileIdOfThisTask();
    SmallFile smallFile3 = new SmallFile();
    smallFile3.location = new HoodieRecordLocation("t0", fileId3);
    smallFile3.sizeBytes = 456;
    List<SmallFile> smallFiles2 = mockBucketAssigner1.smallFilesOfThisTask(Arrays.asList(smallFile2, smallFile3));
    assertThat(smallFiles2.size(), is(2));
}
Also used : SmallFile(org.apache.hudi.table.action.commit.SmallFile) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) Test(org.junit.jupiter.api.Test)

Example 8 with SmallFile

use of org.apache.hudi.table.action.commit.SmallFile in project hudi by apache.

the class TestBucketAssigner method testUpdateAndInsertWithSmallFiles.

@Test
public void testUpdateAndInsertWithSmallFiles() {
    SmallFile f0 = new SmallFile();
    f0.location = new HoodieRecordLocation("t0", "f0");
    f0.sizeBytes = 12;
    SmallFile f1 = new SmallFile();
    f1.location = new HoodieRecordLocation("t0", "f1");
    // no left space to append new records to this bucket
    f1.sizeBytes = 122879;
    SmallFile f2 = new SmallFile();
    f2.location = new HoodieRecordLocation("t0", "f2");
    f2.sizeBytes = 56;
    Map<String, List<SmallFile>> smallFilesMap = new HashMap<>();
    smallFilesMap.put("par1", Arrays.asList(f0, f1));
    smallFilesMap.put("par2", Collections.singletonList(f2));
    MockBucketAssigner mockBucketAssigner = new MockBucketAssigner(context, writeConfig, smallFilesMap);
    mockBucketAssigner.addUpdate("par1", "f0");
    BucketInfo bucketInfo = mockBucketAssigner.addInsert("par1");
    assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f0");
    mockBucketAssigner.addInsert("par1");
    bucketInfo = mockBucketAssigner.addInsert("par1");
    assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f0");
    mockBucketAssigner.addUpdate("par1", "f2");
    mockBucketAssigner.addInsert("par1");
    bucketInfo = mockBucketAssigner.addInsert("par1");
    assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f0");
    mockBucketAssigner.addUpdate("par2", "f0");
    mockBucketAssigner.addInsert("par2");
    bucketInfo = mockBucketAssigner.addInsert("par2");
    assertBucketEquals(bucketInfo, "par2", BucketType.UPDATE, "f2");
}
Also used : HashMap(java.util.HashMap) SmallFile(org.apache.hudi.table.action.commit.SmallFile) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) List(java.util.List) BucketInfo(org.apache.hudi.table.action.commit.BucketInfo) Test(org.junit.jupiter.api.Test)

Example 9 with SmallFile

use of org.apache.hudi.table.action.commit.SmallFile in project hudi by apache.

the class TestBucketAssigner method testInsertWithPartialSmallFiles.

/**
 * Test that only partial small files are assigned to the task.
 */
@Test
public void testInsertWithPartialSmallFiles() {
    SmallFile f0 = new SmallFile();
    f0.location = new HoodieRecordLocation("t0", "f0");
    f0.sizeBytes = 12;
    SmallFile f1 = new SmallFile();
    f1.location = new HoodieRecordLocation("t0", "f1");
    // no left space to append new records to this bucket
    f1.sizeBytes = 122879;
    SmallFile f2 = new SmallFile();
    f2.location = new HoodieRecordLocation("t0", "f2");
    f2.sizeBytes = 56;
    Map<String, List<SmallFile>> smallFilesMap = new HashMap<>();
    smallFilesMap.put("par1", Arrays.asList(f0, f1, f2));
    MockBucketAssigner mockBucketAssigner = new MockBucketAssigner(0, 2, context, writeConfig, smallFilesMap);
    BucketInfo bucketInfo = mockBucketAssigner.addInsert("par1");
    assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f2");
    mockBucketAssigner.addInsert("par1");
    bucketInfo = mockBucketAssigner.addInsert("par1");
    assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f2");
    bucketInfo = mockBucketAssigner.addInsert("par3");
    assertBucketEquals(bucketInfo, "par3", BucketType.INSERT);
    bucketInfo = mockBucketAssigner.addInsert("par3");
    assertBucketEquals(bucketInfo, "par3", BucketType.INSERT);
    MockBucketAssigner mockBucketAssigner2 = new MockBucketAssigner(1, 2, context, writeConfig, smallFilesMap);
    BucketInfo bucketInfo2 = mockBucketAssigner2.addInsert("par1");
    assertBucketEquals(bucketInfo2, "par1", BucketType.UPDATE, "f0");
    mockBucketAssigner2.addInsert("par1");
    bucketInfo2 = mockBucketAssigner2.addInsert("par1");
    assertBucketEquals(bucketInfo2, "par1", BucketType.UPDATE, "f0");
    bucketInfo2 = mockBucketAssigner2.addInsert("par3");
    assertBucketEquals(bucketInfo2, "par3", BucketType.INSERT);
    bucketInfo2 = mockBucketAssigner2.addInsert("par3");
    assertBucketEquals(bucketInfo2, "par3", BucketType.INSERT);
}
Also used : HashMap(java.util.HashMap) SmallFile(org.apache.hudi.table.action.commit.SmallFile) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) List(java.util.List) BucketInfo(org.apache.hudi.table.action.commit.BucketInfo) Test(org.junit.jupiter.api.Test)

Aggregations

SmallFile (org.apache.hudi.table.action.commit.SmallFile)9 HoodieRecordLocation (org.apache.hudi.common.model.HoodieRecordLocation)8 Test (org.junit.jupiter.api.Test)6 HashMap (java.util.HashMap)4 List (java.util.List)4 BucketInfo (org.apache.hudi.table.action.commit.BucketInfo)4 ArrayList (java.util.ArrayList)3 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)3 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)3 FileSlice (org.apache.hudi.common.model.FileSlice)2 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)1 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)1 WriteProfile (org.apache.hudi.sink.partitioner.profile.WriteProfile)1