use of org.apache.accumulo.core.data.LoadPlan.Destination in project accumulo by apache.
the class BulkImport method computeMappingFromPlan.
private SortedMap<KeyExtent, Files> computeMappingFromPlan(FileSystem fs, TableId tableId, Path srcPath, int maxTablets) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
Map<String, List<Destination>> fileDestinations = plan.getDestinations().stream().collect(groupingBy(Destination::getFileName));
List<FileStatus> statuses = filterInvalid(fs.listStatus(srcPath, p -> !p.getName().equals(Constants.BULK_LOAD_MAPPING)));
Map<String, Long> fileLens = getFileLenMap(statuses);
if (!fileDestinations.keySet().equals(fileLens.keySet())) {
throw new IllegalArgumentException("Load plan files differ from directory files, symmetric difference : " + Sets.symmetricDifference(fileDestinations.keySet(), fileLens.keySet()));
}
KeyExtentCache extentCache = new ConcurrentKeyExtentCache(tableId, context);
// Pre-populate cache by looking up all end rows in sorted order. Doing this in sorted order
// leverages read ahead.
fileDestinations.values().stream().flatMap(List::stream).filter(dest -> dest.getRangeType() == RangeType.FILE).flatMap(dest -> Stream.of(dest.getStartRow(), dest.getEndRow())).filter(Objects::nonNull).map(Text::new).sorted().distinct().forEach(row -> {
try {
extentCache.lookup(row);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
SortedMap<KeyExtent, Files> mapping = new TreeMap<>();
for (Entry<String, List<Destination>> entry : fileDestinations.entrySet()) {
String fileName = entry.getKey();
List<Destination> destinations = entry.getValue();
Set<KeyExtent> extents = mapDestinationsToExtents(tableId, extentCache, destinations);
log.debug("The file {} mapped to {} tablets.", fileName, extents.size());
checkTabletCount(maxTablets, extents.size(), fileName);
long estSize = (long) (fileLens.get(fileName) / (double) extents.size());
for (KeyExtent keyExtent : extents) {
mapping.computeIfAbsent(keyExtent, k -> new Files()).add(new FileInfo(fileName, estSize, 0));
}
}
return mergeOverlapping(mapping);
}
Aggregations