use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class ITTestClusteringCommand method generateCommits.
private void generateCommits() throws IOException {
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
// Create the write client to write some records in
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withDeleteParallelism(2).forTable(tableName).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
SparkRDDWriteClient<HoodieAvroPayload> client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jsc), cfg);
insert(jsc, client, dataGen, "001");
insert(jsc, client, dataGen, "002");
}
use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class SparkMain method createSavepoint.
private static int createSavepoint(JavaSparkContext jsc, String commitTime, String user, String comments, String basePath) throws Exception {
SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
try {
client.savepoint(commitTime, user, comments);
LOG.info(String.format("The commit \"%s\" has been savepointed.", commitTime));
return 0;
} catch (HoodieSavepointException se) {
LOG.warn(String.format("Failed: Could not create savepoint \"%s\".", commitTime));
return -1;
}
}
use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class SparkMain method deleteSavepoint.
private static int deleteSavepoint(JavaSparkContext jsc, String savepointTime, String basePath) throws Exception {
SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
try {
client.deleteSavepoint(savepointTime);
LOG.info(String.format("Savepoint \"%s\" deleted.", savepointTime));
return 0;
} catch (Exception e) {
LOG.warn(String.format("Failed: Could not delete savepoint \"%s\".", savepointTime), e);
return -1;
}
}
use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class TestHoodieBackedMetadata method testReader.
/**
* Ensure that the reader only reads completed instants.
*
* @throws IOException
*/
@Test
public void testReader() throws Exception {
init(HoodieTableType.COPY_ON_WRITE);
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
List<HoodieRecord> records;
List<WriteStatus> writeStatuses;
String[] commitTimestamps = { HoodieActiveTimeline.createNewInstantTime(), HoodieActiveTimeline.createNewInstantTime(), HoodieActiveTimeline.createNewInstantTime(), HoodieActiveTimeline.createNewInstantTime() };
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, getWriteConfig(true, true))) {
for (int i = 0; i < commitTimestamps.length; ++i) {
records = dataGen.generateInserts(commitTimestamps[i], 5);
client.startCommitWithTime(commitTimestamps[i]);
writeStatuses = client.bulkInsert(jsc.parallelize(records, 1), commitTimestamps[i]).collect();
assertNoWriteErrors(writeStatuses);
}
// Ensure we can see files from each commit
Set<String> timelineTimestamps = getAllFiles(metadata(client)).stream().map(p -> p.getName()).map(n -> FSUtils.getCommitTime(n)).collect(Collectors.toSet());
assertEquals(timelineTimestamps.size(), commitTimestamps.length);
for (int i = 0; i < commitTimestamps.length; ++i) {
assertTrue(timelineTimestamps.contains(commitTimestamps[i]));
}
// mark each commit as incomplete and ensure files are not seen
for (int i = 0; i < commitTimestamps.length; ++i) {
FileCreateUtils.deleteCommit(basePath, commitTimestamps[i]);
timelineTimestamps = getAllFiles(metadata(client)).stream().map(p -> p.getName()).map(n -> FSUtils.getCommitTime(n)).collect(Collectors.toSet());
assertEquals(timelineTimestamps.size(), commitTimestamps.length - 1);
for (int j = 0; j < commitTimestamps.length; ++j) {
assertTrue(j == i || timelineTimestamps.contains(commitTimestamps[j]));
}
FileCreateUtils.createCommit(basePath, commitTimestamps[i]);
}
// Test multiple incomplete commits
FileCreateUtils.deleteCommit(basePath, commitTimestamps[0]);
FileCreateUtils.deleteCommit(basePath, commitTimestamps[2]);
timelineTimestamps = getAllFiles(metadata(client)).stream().map(p -> p.getName()).map(n -> FSUtils.getCommitTime(n)).collect(Collectors.toSet());
assertEquals(timelineTimestamps.size(), commitTimestamps.length - 2);
for (int j = 0; j < commitTimestamps.length; ++j) {
assertTrue(j == 0 || j == 2 || timelineTimestamps.contains(commitTimestamps[j]));
}
// Test no completed commits
for (int i = 0; i < commitTimestamps.length; ++i) {
FileCreateUtils.deleteCommit(basePath, commitTimestamps[i]);
}
timelineTimestamps = getAllFiles(metadata(client)).stream().map(p -> p.getName()).map(n -> FSUtils.getCommitTime(n)).collect(Collectors.toSet());
assertEquals(timelineTimestamps.size(), 0);
}
}
use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.
the class TestHoodieBackedMetadata method testMetadataMetrics.
/**
* Test various metrics published by metadata table.
*/
@Test
public void testMetadataMetrics() throws Exception {
init(HoodieTableType.COPY_ON_WRITE, false);
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, getWriteConfigBuilder(true, true, true).build())) {
// Write
String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
client.startCommitWithTime(newCommitTime);
List<WriteStatus> writeStatuses = client.insert(jsc.parallelize(records, 1), newCommitTime).collect();
assertNoWriteErrors(writeStatuses);
validateMetadata(client);
Registry metricsRegistry = Registry.getRegistry("HoodieMetadata");
assertTrue(metricsRegistry.getAllCounts().containsKey(HoodieMetadataMetrics.INITIALIZE_STR + ".count"));
assertTrue(metricsRegistry.getAllCounts().containsKey(HoodieMetadataMetrics.INITIALIZE_STR + ".totalDuration"));
assertTrue(metricsRegistry.getAllCounts().get(HoodieMetadataMetrics.INITIALIZE_STR + ".count") >= 1L);
final String prefix = MetadataPartitionType.FILES.getPartitionPath() + ".";
assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_COUNT_BASE_FILES));
assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_COUNT_LOG_FILES));
assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_TOTAL_BASE_FILE_SIZE));
assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_TOTAL_LOG_FILE_SIZE));
}
}
Aggregations