use of org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW in project accumulo by apache.
the class PrepBulkImport method checkForMerge.
private void checkForMerge(final long tid, final Manager manager) throws Exception {
VolumeManager fs = manager.getVolumeManager();
final Path bulkDir = new Path(bulkInfo.sourceDir);
int maxTablets = Integer.parseInt(manager.getContext().getTableConfiguration(bulkInfo.tableId).get(Property.TABLE_BULK_MAX_TABLETS));
try (LoadMappingIterator lmi = BulkSerialize.readLoadMapping(bulkDir.toString(), bulkInfo.tableId, fs::open)) {
TabletIterFactory tabletIterFactory = startRow -> TabletsMetadata.builder(manager.getContext()).forTable(bulkInfo.tableId).overlapping(startRow, null).checkConsistency().fetch(PREV_ROW).build().stream().map(TabletMetadata::getExtent).iterator();
sanityCheckLoadMapping(bulkInfo.tableId.canonical(), lmi, tabletIterFactory, maxTablets, tid);
}
}
use of org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW in project accumulo by apache.
the class BulkNewIT method verifyMetadata.
private void verifyMetadata(AccumuloClient client, String tableName, Map<String, Set<String>> expectedHashes) {
Set<String> endRowsSeen = new HashSet<>();
String id = client.tableOperations().tableIdMap().get(tableName);
try (TabletsMetadata tablets = TabletsMetadata.builder(client).forTable(TableId.of(id)).fetch(FILES, LOADED, PREV_ROW).build()) {
for (TabletMetadata tablet : tablets) {
assertTrue(tablet.getLoaded().isEmpty());
Set<String> fileHashes = tablet.getFiles().stream().map(f -> hash(f.getMetaUpdateDelete())).collect(Collectors.toSet());
String endRow = tablet.getEndRow() == null ? "null" : tablet.getEndRow().toString();
assertEquals(expectedHashes.get(endRow), fileHashes);
endRowsSeen.add(endRow);
}
assertEquals(expectedHashes.keySet(), endRowsSeen);
}
}
use of org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW in project accumulo by apache.
the class Merge method getSizeIterator.
protected Iterator<Size> getSizeIterator(AccumuloClient client, String tablename, Text start, Text end) throws MergeException {
// open up metadata, walk through the tablets.
TableId tableId;
TabletsMetadata tablets;
try {
ClientContext context = (ClientContext) client;
tableId = context.getTableId(tablename);
tablets = TabletsMetadata.builder(context).scanMetadataTable().overRange(new KeyExtent(tableId, end, start).toMetaRange()).fetch(FILES, PREV_ROW).build();
} catch (Exception e) {
throw new MergeException(e);
}
return tablets.stream().map(tm -> {
long size = tm.getFilesMap().values().stream().mapToLong(DataFileValue::getSize).sum();
return new Size(tm.getExtent(), size);
}).iterator();
}
use of org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW in project accumulo by apache.
the class Gatherer method getFilesGroupedByLocation.
/**
* @param fileSelector
* only returns files that match this predicate
* @return A map of the form : {@code map<tserver location, map<path, list<range>>} . The ranges
* associated with a file represent the tablets that use the file.
*/
private Map<String, Map<TabletFile, List<TRowRange>>> getFilesGroupedByLocation(Predicate<TabletFile> fileSelector) {
Iterable<TabletMetadata> tmi = TabletsMetadata.builder(ctx).forTable(tableId).overlapping(startRow, endRow).fetch(FILES, LOCATION, LAST, PREV_ROW).build();
// get a subset of files
Map<TabletFile, List<TabletMetadata>> files = new HashMap<>();
for (TabletMetadata tm : tmi) {
for (TabletFile file : tm.getFiles()) {
if (fileSelector.test(file)) {
// TODO push this filtering to server side and possibly use batch scanner
files.computeIfAbsent(file, s -> new ArrayList<>()).add(tm);
}
}
}
// group by location, then file
Map<String, Map<TabletFile, List<TRowRange>>> locations = new HashMap<>();
List<String> tservers = null;
for (Entry<TabletFile, List<TabletMetadata>> entry : files.entrySet()) {
String location = // filter
entry.getValue().stream().filter(tm -> tm.getLocation() != null).map(// convert to host:port strings
tm -> tm.getLocation().getHostPort()).min(// find minimum host:port
String::compareTo).orElse(// if no locations,
entry.getValue().stream().filter(tm -> tm.getLast() != null).map(// convert to host:port strings
tm -> tm.getLast().getHostPort()).min(String::compareTo).orElse(// find minimum last location or return null
null));
if (location == null) {
if (tservers == null) {
tservers = ctx.instanceOperations().getTabletServers();
Collections.sort(tservers);
}
// When no location, the approach below will consistently choose the same tserver for the
// same file (as long as the set of tservers is stable).
int idx = Math.abs(Hashing.murmur3_32_fixed().hashString(entry.getKey().getPathStr(), UTF_8).asInt()) % tservers.size();
location = tservers.get(idx);
}
// merge contiguous ranges
List<Range> merged = Range.mergeOverlapping(Lists.transform(entry.getValue(), tm -> tm.getExtent().toDataRange()));
List<TRowRange> ranges = // clip
merged.stream().map(r -> toClippedExtent(r).toThrift()).collect(Collectors.toList());
// ranges
// to
// queried
// range
locations.computeIfAbsent(location, s -> new HashMap<>()).put(entry.getKey(), ranges);
}
return locations;
}
Aggregations