use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestHBaseQPSResourceAllocator method testsDefaultQPSResourceAllocator.
@Test
public void testsDefaultQPSResourceAllocator() {
HoodieWriteConfig config = getConfig(Option.empty());
SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(), DefaultHBaseQPSResourceAllocator.class.getName());
assertEquals(config.getHbaseIndexQPSFraction(), hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestSparkHoodieHBaseIndex method testSimpleTagLocationWithInvalidCommit.
/*
* Test case to verify that for taglocation entries present in HBase, if the corresponding commit instant is missing
* in timeline and the commit is not archived, taglocation would reset the current record location to null.
*/
@Test
public void testSimpleTagLocationWithInvalidCommit() throws Exception {
// Load to memory
HoodieWriteConfig config = getConfigBuilder(100, false, false).withRollbackUsingMarkers(false).build();
SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
String newCommitTime = writeClient.startCommit();
// make a commit with 199 records
JavaRDD<HoodieRecord> writeRecords = generateAndCommitRecords(writeClient, 199, newCommitTime);
// make a second commit with a single record
String invalidCommit = writeClient.startCommit();
JavaRDD<HoodieRecord> invalidWriteRecords = generateAndCommitRecords(writeClient, 1, invalidCommit);
// verify location is tagged.
HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
JavaRDD<HoodieRecord> javaRDD0 = tagLocation(index, invalidWriteRecords, hoodieTable);
// one record present
assert (javaRDD0.collect().size() == 1);
// it is tagged
assert (javaRDD0.filter(HoodieRecord::isCurrentLocationKnown).collect().size() == 1);
assert (javaRDD0.collect().get(0).getCurrentLocation().getInstantTime().equals(invalidCommit));
// rollback the invalid commit, so that hbase will be left with a stale entry.
writeClient.rollback(invalidCommit);
// Now tagLocation for the valid records, hbaseIndex should tag them
metaClient = HoodieTableMetaClient.reload(metaClient);
hoodieTable = HoodieSparkTable.create(config, context, metaClient);
JavaRDD<HoodieRecord> javaRDD1 = tagLocation(index, writeRecords, hoodieTable);
assert (javaRDD1.filter(HoodieRecord::isCurrentLocationKnown).collect().size() == 199);
// tagLocation for the invalid record - commit is not present in timeline due to rollback.
JavaRDD<HoodieRecord> javaRDD2 = tagLocation(index, invalidWriteRecords, hoodieTable);
// one record present
assert (javaRDD2.collect().size() == 1);
// it is not tagged
assert (javaRDD2.filter(HoodieRecord::isCurrentLocationKnown).collect().size() == 0);
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestSparkHoodieHBaseIndex method testEnsureTagLocationUsesCommitTimeline.
/*
* Test case to verify that taglocation() uses the commit timeline to validate the commitTS stored in hbase.
* When CheckIfValidCommit() in HbaseIndex uses the incorrect timeline filtering, this test would fail.
*/
@Test
public void testEnsureTagLocationUsesCommitTimeline() throws Exception {
// Load to memory
HoodieWriteConfig config = getConfigBuilder(100, false, false).withRollbackUsingMarkers(false).build();
SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
String commitTime1 = writeClient.startCommit();
JavaRDD<HoodieRecord> writeRecords1 = generateAndCommitRecords(writeClient, 20, commitTime1);
// rollback the commit - leaves a clean file in timeline.
writeClient.rollback(commitTime1);
// create a second commit with 20 records
metaClient = HoodieTableMetaClient.reload(metaClient);
generateAndCommitRecords(writeClient, 20);
// Now tagLocation for the first set of rolledback records, hbaseIndex should tag them
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
JavaRDD<HoodieRecord> javaRDD1 = tagLocation(index, writeRecords1, hoodieTable);
assert (javaRDD1.filter(HoodieRecord::isCurrentLocationKnown).collect().size() == 20);
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestSparkHoodieHBaseIndex method testSimpleTagLocationAndUpdate.
@ParameterizedTest
@EnumSource(HoodieTableType.class)
public void testSimpleTagLocationAndUpdate(HoodieTableType tableType) throws Exception {
metaClient = HoodieTestUtils.init(hadoopConf, basePath, tableType);
final String newCommitTime = "001";
final int numRecords = 10;
List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, numRecords);
JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
// Load to memory
HoodieWriteConfig config = getConfig();
SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
// Test tagLocation without any entries in index
JavaRDD<HoodieRecord> records1 = tagLocation(index, writeRecords, hoodieTable);
assertEquals(0, records1.filter(record -> record.isCurrentLocationKnown()).count());
// Insert 200 records
writeClient.startCommitWithTime(newCommitTime);
JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
assertNoWriteErrors(writeStatues.collect());
// Now tagLocation for these records, hbaseIndex should not tag them since commit never occurred
JavaRDD<HoodieRecord> records2 = tagLocation(index, writeRecords, hoodieTable);
assertEquals(0, records2.filter(record -> record.isCurrentLocationKnown()).count());
// Now commit this & update location of records inserted and validate no errors
writeClient.commit(newCommitTime, writeStatues);
// Now tagLocation for these records, hbaseIndex should tag them correctly
metaClient = HoodieTableMetaClient.reload(metaClient);
hoodieTable = HoodieSparkTable.create(config, context, metaClient);
List<HoodieRecord> records3 = tagLocation(index, writeRecords, hoodieTable).collect();
assertEquals(numRecords, records3.stream().filter(record -> record.isCurrentLocationKnown()).count());
assertEquals(numRecords, records3.stream().map(record -> record.getKey().getRecordKey()).distinct().count());
assertEquals(numRecords, records3.stream().filter(record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count());
}
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestSparkHoodieHBaseIndex method testHbaseTagLocationForArchivedCommits.
// Verify hbase is tagging records belonging to an archived commit as valid.
@Test
public void testHbaseTagLocationForArchivedCommits() throws Exception {
// Load to memory
Map<String, String> params = new HashMap<String, String>();
params.put(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), "1");
params.put(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), "3");
params.put(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), "2");
HoodieWriteConfig config = getConfigBuilder(100, false, false).withProps(params).build();
SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
// make first commit with 20 records
JavaRDD<HoodieRecord> writeRecords1 = generateAndCommitRecords(writeClient, 20);
// Make 3 additional commits, so that first commit is archived
for (int nCommit = 0; nCommit < 3; nCommit++) {
generateAndCommitRecords(writeClient, 20);
}
// tagLocation for the first set of records (for the archived commit), hbaseIndex should tag them as valid
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
JavaRDD<HoodieRecord> javaRDD1 = tagLocation(index, writeRecords1, hoodieTable);
assertEquals(20, javaRDD1.filter(HoodieRecord::isCurrentLocationKnown).collect().size());
}
Aggregations