use of org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo in project accumulo by apache.
the class BulkImportTest method addMapping.
private void addMapping(SortedMap<KeyExtent, Files> mappings, String prevRow, String endRow, String... fileNames) {
KeyExtent ke = new KeyExtent(TableId.of("42"), endRow == null ? null : new Text(endRow), prevRow == null ? null : new Text(prevRow));
Files files = new Files();
for (String name : fileNames) {
files.add(new FileInfo(name, 2, 2));
}
mappings.put(ke, files);
}
use of org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo in project accumulo by apache.
the class BulkSerializeTest method testRemap.
@Test
public void testRemap() throws Exception {
TableId tableId = TableId.of("3");
SortedMap<KeyExtent, Bulk.Files> mapping = generateMapping(tableId);
SortedMap<KeyExtent, Bulk.Files> newNameMapping = new TreeMap<>();
Map<String, String> nameMap = new HashMap<>();
mapping.forEach((extent, files) -> {
Files newFiles = new Files();
files.forEach(fi -> {
newFiles.add(new FileInfo("N" + fi.name, fi.estSize, fi.estEntries));
nameMap.put(fi.name, "N" + fi.name);
});
newNameMapping.put(extent, newFiles);
});
ByteArrayOutputStream mappingBaos = new ByteArrayOutputStream();
ByteArrayOutputStream nameBaos = new ByteArrayOutputStream();
BulkSerialize.writeRenameMap(nameMap, "/some/dir", p -> nameBaos);
BulkSerialize.writeLoadMapping(mapping, "/some/dir", p -> mappingBaos);
Input input = p -> {
if (p.getName().equals(Constants.BULK_LOAD_MAPPING)) {
return new ByteArrayInputStream(mappingBaos.toByteArray());
} else if (p.getName().equals(Constants.BULK_RENAME_FILE)) {
return new ByteArrayInputStream(nameBaos.toByteArray());
} else {
throw new IllegalArgumentException("bad path " + p);
}
};
try (LoadMappingIterator lmi = BulkSerialize.getUpdatedLoadMapping("/some/dir", tableId, input)) {
SortedMap<KeyExtent, Bulk.Files> actual = new TreeMap<>();
lmi.forEachRemaining(e -> actual.put(e.getKey(), e.getValue()));
assertEquals(newNameMapping, actual);
}
}
use of org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo in project accumulo by apache.
the class BulkImport method computeMappingFromPlan.
private SortedMap<KeyExtent, Files> computeMappingFromPlan(FileSystem fs, TableId tableId, Path srcPath, int maxTablets) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
Map<String, List<Destination>> fileDestinations = plan.getDestinations().stream().collect(groupingBy(Destination::getFileName));
List<FileStatus> statuses = filterInvalid(fs.listStatus(srcPath, p -> !p.getName().equals(Constants.BULK_LOAD_MAPPING)));
Map<String, Long> fileLens = getFileLenMap(statuses);
if (!fileDestinations.keySet().equals(fileLens.keySet())) {
throw new IllegalArgumentException("Load plan files differ from directory files, symmetric difference : " + Sets.symmetricDifference(fileDestinations.keySet(), fileLens.keySet()));
}
KeyExtentCache extentCache = new ConcurrentKeyExtentCache(tableId, context);
// Pre-populate cache by looking up all end rows in sorted order. Doing this in sorted order
// leverages read ahead.
fileDestinations.values().stream().flatMap(List::stream).filter(dest -> dest.getRangeType() == RangeType.FILE).flatMap(dest -> Stream.of(dest.getStartRow(), dest.getEndRow())).filter(Objects::nonNull).map(Text::new).sorted().distinct().forEach(row -> {
try {
extentCache.lookup(row);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
SortedMap<KeyExtent, Files> mapping = new TreeMap<>();
for (Entry<String, List<Destination>> entry : fileDestinations.entrySet()) {
String fileName = entry.getKey();
List<Destination> destinations = entry.getValue();
Set<KeyExtent> extents = mapDestinationsToExtents(tableId, extentCache, destinations);
log.debug("The file {} mapped to {} tablets.", fileName, extents.size());
checkTabletCount(maxTablets, extents.size(), fileName);
long estSize = (long) (fileLens.get(fileName) / (double) extents.size());
for (KeyExtent keyExtent : extents) {
mapping.computeIfAbsent(keyExtent, k -> new Files()).add(new FileInfo(fileName, estSize, 0));
}
}
return mergeOverlapping(mapping);
}
Aggregations