use of org.apache.hudi.common.model.FileSlice in project hudi by apache.
the class TestHoodieCompactionStrategy method createCompactionOperations.
private List<HoodieCompactionOperation> createCompactionOperations(HoodieWriteConfig config, Map<Long, List<Long>> sizesMap, Map<Long, String> keyToPartitionMap) {
List<HoodieCompactionOperation> operations = new ArrayList<>(sizesMap.size());
sizesMap.forEach((k, v) -> {
HoodieBaseFile df = TestHoodieBaseFile.newDataFile(k);
String partitionPath = keyToPartitionMap.get(k);
List<HoodieLogFile> logFiles = v.stream().map(TestHoodieLogFile::newLogFile).collect(Collectors.toList());
FileSlice slice = new FileSlice(new HoodieFileGroupId(partitionPath, df.getFileId()), df.getCommitTime());
slice.setBaseFile(df);
logFiles.stream().forEach(f -> slice.addLogFile(f));
operations.add(new HoodieCompactionOperation(df.getCommitTime(), logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList()), df.getPath(), df.getFileId(), partitionPath, config.getCompactionStrategy().captureMetrics(config, slice), df.getBootstrapBaseFile().map(BaseFile::getPath).orElse(null)));
});
return operations;
}
use of org.apache.hudi.common.model.FileSlice in project hudi by apache.
the class TestCopyOnWriteRollbackActionExecutor method testCopyOnWriteRollbackWithReplaceCommits.
// Verify that rollback works with replacecommit
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testCopyOnWriteRollbackWithReplaceCommits(boolean isUsingMarkers) throws IOException {
// 1. prepare data and assert data result
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
HoodieWriteConfig cfg = getConfigBuilder().withRollbackUsingMarkers(isUsingMarkers).withAutoCommit(false).build();
this.insertOverwriteCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, !isUsingMarkers);
HoodieTable table = this.getHoodieTable(metaClient, cfg);
performRollbackAndValidate(isUsingMarkers, cfg, table, firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices);
}
use of org.apache.hudi.common.model.FileSlice in project hudi by apache.
the class TimelineServerPerf method runLookups.
public List<PerfStats> runLookups(JavaSparkContext jsc, List<String> partitionPaths, SyncableFileSystemView fsView, int numIterations, int concurrency) {
HoodieEngineContext context = new HoodieSparkEngineContext(jsc);
context.setJobStatus(this.getClass().getSimpleName(), "Lookup all performance stats");
return context.flatMap(partitionPaths, p -> {
ScheduledThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(100);
final List<PerfStats> result = new ArrayList<>();
final List<ScheduledFuture<PerfStats>> futures = new ArrayList<>();
List<FileSlice> slices = fsView.getLatestFileSlices(p).collect(Collectors.toList());
String fileId = slices.isEmpty() ? "dummyId" : slices.get(new Random(Double.doubleToLongBits(Math.random())).nextInt(slices.size())).getFileId();
IntStream.range(0, concurrency).forEach(i -> futures.add(executor.schedule(() -> runOneRound(fsView, p, fileId, i, numIterations), 0, TimeUnit.NANOSECONDS)));
futures.forEach(x -> {
try {
result.add(x.get());
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
});
System.out.println("SLICES are=");
slices.forEach(s -> System.out.println("\t\tFileSlice=" + s));
return result.stream();
}, cfg.numExecutors);
}
use of org.apache.hudi.common.model.FileSlice in project hudi by apache.
the class TestFileSystemViewCommand method testShowLatestFileSlices.
/**
* Test case for command 'show fsview latest'.
*/
@Test
public void testShowLatestFileSlices() {
// Test show with partition path '2016/03/15'
CommandResult cr = shell().executeCommand("show fsview latest --partitionPath " + partitionPath);
assertTrue(cr.isSuccess());
Stream<FileSlice> fileSlice = fsView.getLatestFileSlices(partitionPath);
List<Comparable[]> rows = new ArrayList<>();
fileSlice.forEach(fs -> {
int idx = 0;
// For base file only Views, do not display any delta-file related columns
Comparable[] row = new Comparable[13];
row[idx++] = partitionPath;
row[idx++] = fs.getFileId();
row[idx++] = fs.getBaseInstantTime();
row[idx++] = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getPath() : "";
long dataFileSize = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getFileSize() : -1;
row[idx++] = dataFileSize;
row[idx++] = fs.getLogFiles().count();
row[idx++] = fs.getLogFiles().mapToLong(HoodieLogFile::getFileSize).sum();
long logFilesScheduledForCompactionTotalSize = fs.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime())).mapToLong(HoodieLogFile::getFileSize).sum();
row[idx++] = logFilesScheduledForCompactionTotalSize;
long logFilesUnscheduledTotalSize = fs.getLogFiles().filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime())).mapToLong(HoodieLogFile::getFileSize).sum();
row[idx++] = logFilesUnscheduledTotalSize;
double logSelectedForCompactionToBaseRatio = dataFileSize > 0 ? logFilesScheduledForCompactionTotalSize / (dataFileSize * 1.0) : -1;
row[idx++] = logSelectedForCompactionToBaseRatio;
double logUnscheduledToBaseRatio = dataFileSize > 0 ? logFilesUnscheduledTotalSize / (dataFileSize * 1.0) : -1;
row[idx++] = logUnscheduledToBaseRatio;
row[idx++] = fs.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime())).collect(Collectors.toList()).toString();
row[idx++] = fs.getLogFiles().filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime())).collect(Collectors.toList()).toString();
rows.add(row);
});
Function<Object, String> converterFunction = entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString())));
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE, converterFunction);
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_SIZE, converterFunction);
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DELTA_SIZE_SCHEDULED, converterFunction);
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DELTA_SIZE_UNSCHEDULED, converterFunction);
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE).addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELTA_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_SIZE).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_SIZE_SCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_SIZE_UNSCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_BASE_SCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_BASE_UNSCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_SCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_UNSCHEDULED);
String expected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, rows);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
use of org.apache.hudi.common.model.FileSlice in project hudi by apache.
the class CompactionAdminClient method validateCompactionOperation.
/**
* Check if a compaction operation is valid.
*
* @param metaClient Hoodie Table Meta client
* @param compactionInstant Compaction Instant
* @param operation Compaction Operation
* @param fsViewOpt File System View
*/
private ValidationOpResult validateCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation operation, Option<HoodieTableFileSystemView> fsViewOpt) throws IOException {
HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Option<HoodieInstant> lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant();
try {
if (lastInstant.isPresent()) {
Option<FileSlice> fileSliceOptional = Option.fromJavaOptional(fileSystemView.getLatestUnCompactedFileSlices(operation.getPartitionPath()).filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst());
if (fileSliceOptional.isPresent()) {
FileSlice fs = fileSliceOptional.get();
Option<HoodieBaseFile> df = fs.getBaseFile();
if (operation.getDataFileName().isPresent()) {
String expPath = metaClient.getFs().getFileStatus(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), new Path(operation.getDataFileName().get()))).getPath().toString();
ValidationUtils.checkArgument(df.isPresent(), "Data File must be present. File Slice was : " + fs + ", operation :" + operation);
ValidationUtils.checkArgument(df.get().getPath().equals(expPath), "Base Path in operation is specified as " + expPath + " but got path " + df.get().getPath());
}
Set<HoodieLogFile> logFilesInFileSlice = fs.getLogFiles().collect(Collectors.toSet());
Set<HoodieLogFile> logFilesInCompactionOp = operation.getDeltaFileNames().stream().map(dp -> {
try {
FileStatus[] fileStatuses = metaClient.getFs().listStatus(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), new Path(dp)));
ValidationUtils.checkArgument(fileStatuses.length == 1, "Expect only 1 file-status");
return new HoodieLogFile(fileStatuses[0]);
} catch (FileNotFoundException fe) {
throw new CompactionValidationException(fe.getMessage());
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
}).collect(Collectors.toSet());
Set<HoodieLogFile> missing = logFilesInCompactionOp.stream().filter(lf -> !logFilesInFileSlice.contains(lf)).collect(Collectors.toSet());
ValidationUtils.checkArgument(missing.isEmpty(), "All log files specified in compaction operation is not present. Missing :" + missing + ", Exp :" + logFilesInCompactionOp + ", Got :" + logFilesInFileSlice);
Set<HoodieLogFile> diff = logFilesInFileSlice.stream().filter(lf -> !logFilesInCompactionOp.contains(lf)).collect(Collectors.toSet());
ValidationUtils.checkArgument(diff.stream().allMatch(lf -> lf.getBaseCommitTime().equals(compactionInstant)), "There are some log-files which are neither specified in compaction plan " + "nor present after compaction request instant. Some of these :" + diff);
} else {
throw new CompactionValidationException("Unable to find file-slice for file-id (" + operation.getFileId() + " Compaction operation is invalid.");
}
} else {
throw new CompactionValidationException("Unable to find any committed instant. Compaction Operation may be pointing to stale file-slices");
}
} catch (CompactionValidationException | IllegalArgumentException e) {
return new ValidationOpResult(operation, false, Option.of(e));
}
return new ValidationOpResult(operation, true, Option.empty());
}
Aggregations