use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.
the class MetadataCommand method create.
@CliCommand(value = "metadata create", help = "Create the Metadata Table if it does not exist")
public String create(@CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master) throws IOException {
HoodieCLI.getTableMetaClient();
Path metadataPath = new Path(getMetadataTableBasePath(HoodieCLI.basePath));
try {
FileStatus[] statuses = HoodieCLI.fs.listStatus(metadataPath);
if (statuses.length > 0) {
throw new RuntimeException("Metadata directory (" + metadataPath.toString() + ") not empty.");
}
} catch (FileNotFoundException e) {
// Metadata directory does not exist yet
HoodieCLI.fs.mkdirs(metadataPath);
}
HoodieTimer timer = new HoodieTimer().startTimer();
HoodieWriteConfig writeConfig = getWriteConfig();
initJavaSparkContext(Option.of(master));
SparkHoodieBackedTableMetadataWriter.create(HoodieCLI.conf, writeConfig, new HoodieSparkEngineContext(jsc));
return String.format("Created Metadata Table in %s (duration=%.2f secs)", metadataPath, timer.endTimer() / 1000.0);
}
use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.
the class MetadataCommand method listPartitions.
@CliCommand(value = "metadata list-partitions", help = "List all partitions from metadata")
public String listPartitions(@CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master) throws IOException {
HoodieCLI.getTableMetaClient();
initJavaSparkContext(Option.of(master));
HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
HoodieBackedTableMetadata metadata = new HoodieBackedTableMetadata(new HoodieSparkEngineContext(jsc), config, HoodieCLI.basePath, "/tmp");
if (!metadata.enabled()) {
return "[ERROR] Metadata Table not enabled/initialized\n\n";
}
HoodieTimer timer = new HoodieTimer().startTimer();
List<String> partitions = metadata.getAllPartitionPaths();
LOG.debug("Took " + timer.endTimer() + " ms");
final List<Comparable[]> rows = new ArrayList<>();
partitions.stream().sorted(Comparator.reverseOrder()).forEach(p -> {
Comparable[] row = new Comparable[1];
row[0] = p;
rows.add(row);
});
TableHeader header = new TableHeader().addTableHeaderField("partition");
return HoodiePrintHelper.print(header, new HashMap<>(), "", false, Integer.MAX_VALUE, false, rows);
}
use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.
the class TestDatePartitionPathSelector method setup.
@BeforeEach
public void setup() {
initSparkContexts();
initPath();
initFileSystem();
context = new HoodieSparkEngineContext(jsc);
}
use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.
the class TestHoodieBackedMetadata method testReader.
/**
* Ensure that the reader only reads completed instants.
*
* @throws IOException
*/
@Test
public void testReader() throws Exception {
init(HoodieTableType.COPY_ON_WRITE);
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
List<HoodieRecord> records;
List<WriteStatus> writeStatuses;
String[] commitTimestamps = { HoodieActiveTimeline.createNewInstantTime(), HoodieActiveTimeline.createNewInstantTime(), HoodieActiveTimeline.createNewInstantTime(), HoodieActiveTimeline.createNewInstantTime() };
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, getWriteConfig(true, true))) {
for (int i = 0; i < commitTimestamps.length; ++i) {
records = dataGen.generateInserts(commitTimestamps[i], 5);
client.startCommitWithTime(commitTimestamps[i]);
writeStatuses = client.bulkInsert(jsc.parallelize(records, 1), commitTimestamps[i]).collect();
assertNoWriteErrors(writeStatuses);
}
// Ensure we can see files from each commit
Set<String> timelineTimestamps = getAllFiles(metadata(client)).stream().map(p -> p.getName()).map(n -> FSUtils.getCommitTime(n)).collect(Collectors.toSet());
assertEquals(timelineTimestamps.size(), commitTimestamps.length);
for (int i = 0; i < commitTimestamps.length; ++i) {
assertTrue(timelineTimestamps.contains(commitTimestamps[i]));
}
// mark each commit as incomplete and ensure files are not seen
for (int i = 0; i < commitTimestamps.length; ++i) {
FileCreateUtils.deleteCommit(basePath, commitTimestamps[i]);
timelineTimestamps = getAllFiles(metadata(client)).stream().map(p -> p.getName()).map(n -> FSUtils.getCommitTime(n)).collect(Collectors.toSet());
assertEquals(timelineTimestamps.size(), commitTimestamps.length - 1);
for (int j = 0; j < commitTimestamps.length; ++j) {
assertTrue(j == i || timelineTimestamps.contains(commitTimestamps[j]));
}
FileCreateUtils.createCommit(basePath, commitTimestamps[i]);
}
// Test multiple incomplete commits
FileCreateUtils.deleteCommit(basePath, commitTimestamps[0]);
FileCreateUtils.deleteCommit(basePath, commitTimestamps[2]);
timelineTimestamps = getAllFiles(metadata(client)).stream().map(p -> p.getName()).map(n -> FSUtils.getCommitTime(n)).collect(Collectors.toSet());
assertEquals(timelineTimestamps.size(), commitTimestamps.length - 2);
for (int j = 0; j < commitTimestamps.length; ++j) {
assertTrue(j == 0 || j == 2 || timelineTimestamps.contains(commitTimestamps[j]));
}
// Test no completed commits
for (int i = 0; i < commitTimestamps.length; ++i) {
FileCreateUtils.deleteCommit(basePath, commitTimestamps[i]);
}
timelineTimestamps = getAllFiles(metadata(client)).stream().map(p -> p.getName()).map(n -> FSUtils.getCommitTime(n)).collect(Collectors.toSet());
assertEquals(timelineTimestamps.size(), 0);
}
}
use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.
the class TestHoodieBackedMetadata method testMetadataMetrics.
/**
* Test various metrics published by metadata table.
*/
@Test
public void testMetadataMetrics() throws Exception {
init(HoodieTableType.COPY_ON_WRITE, false);
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, getWriteConfigBuilder(true, true, true).build())) {
// Write
String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
client.startCommitWithTime(newCommitTime);
List<WriteStatus> writeStatuses = client.insert(jsc.parallelize(records, 1), newCommitTime).collect();
assertNoWriteErrors(writeStatuses);
validateMetadata(client);
Registry metricsRegistry = Registry.getRegistry("HoodieMetadata");
assertTrue(metricsRegistry.getAllCounts().containsKey(HoodieMetadataMetrics.INITIALIZE_STR + ".count"));
assertTrue(metricsRegistry.getAllCounts().containsKey(HoodieMetadataMetrics.INITIALIZE_STR + ".totalDuration"));
assertTrue(metricsRegistry.getAllCounts().get(HoodieMetadataMetrics.INITIALIZE_STR + ".count") >= 1L);
final String prefix = MetadataPartitionType.FILES.getPartitionPath() + ".";
assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_COUNT_BASE_FILES));
assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_COUNT_LOG_FILES));
assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_TOTAL_BASE_FILE_SIZE));
assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_TOTAL_LOG_FILE_SIZE));
}
}
Aggregations