use of org.apache.accumulo.core.data.thrift.TRowRange in project accumulo by apache.
the class TableOperationsImpl method summaries.
@Override
public SummaryRetriever summaries(String tableName) {
return new SummaryRetriever() {
private Text startRow = null;
private Text endRow = null;
private List<TSummarizerConfiguration> summariesToFetch = Collections.emptyList();
private String summarizerClassRegex;
private boolean flush = false;
@Override
public SummaryRetriever startRow(Text startRow) {
Objects.requireNonNull(startRow);
if (endRow != null) {
Preconditions.checkArgument(startRow.compareTo(endRow) < 0, "Start row must be less than end row : %s >= %s", startRow, endRow);
}
this.startRow = startRow;
return this;
}
@Override
public SummaryRetriever startRow(CharSequence startRow) {
return startRow(new Text(startRow.toString()));
}
@Override
public List<Summary> retrieve() throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
Table.ID tableId = Tables.getTableId(context.getInstance(), tableName);
if (Tables.getTableState(context.getInstance(), tableId) == TableState.OFFLINE)
throw new TableOfflineException(context.getInstance(), tableId.canonicalID());
TRowRange range = new TRowRange(TextUtil.getByteBuffer(startRow), TextUtil.getByteBuffer(endRow));
TSummaryRequest request = new TSummaryRequest(tableId.canonicalID(), range, summariesToFetch, summarizerClassRegex);
if (flush) {
_flush(tableId, startRow, endRow, true);
}
TSummaries ret = ServerClient.execute(context, new TabletClientService.Client.Factory(), client -> {
TSummaries tsr = client.startGetSummaries(Tracer.traceInfo(), context.rpcCreds(), request);
while (!tsr.finished) {
tsr = client.contiuneGetSummaries(Tracer.traceInfo(), tsr.sessionId);
}
return tsr;
});
return new SummaryCollection(ret).getSummaries();
}
@Override
public SummaryRetriever endRow(Text endRow) {
Objects.requireNonNull(endRow);
if (startRow != null) {
Preconditions.checkArgument(startRow.compareTo(endRow) < 0, "Start row must be less than end row : %s >= %s", startRow, endRow);
}
this.endRow = endRow;
return this;
}
@Override
public SummaryRetriever endRow(CharSequence endRow) {
return endRow(new Text(endRow.toString()));
}
@Override
public SummaryRetriever withConfiguration(Collection<SummarizerConfiguration> configs) {
Objects.requireNonNull(configs);
summariesToFetch = configs.stream().map(SummarizerConfigurationUtil::toThrift).collect(Collectors.toList());
return this;
}
@Override
public SummaryRetriever withConfiguration(SummarizerConfiguration... config) {
Objects.requireNonNull(config);
return withConfiguration(Arrays.asList(config));
}
@Override
public SummaryRetriever withMatchingConfiguration(String regex) {
Objects.requireNonNull(regex);
// Do a sanity check here to make sure that regex compiles, instead of having it fail on a tserver.
Pattern.compile(regex);
this.summarizerClassRegex = regex;
return this;
}
@Override
public SummaryRetriever flush(boolean b) {
this.flush = b;
return this;
}
};
}
use of org.apache.accumulo.core.data.thrift.TRowRange in project accumulo by apache.
the class Gatherer method getFilesGroupedByLocation.
/**
* @param fileSelector
* only returns files that match this predicate
* @return A map of the form : {@code map<tserver location, map<path, list<range>>} . The ranges associated with a file represent the tablets that use the
* file.
*/
private Map<String, Map<String, List<TRowRange>>> getFilesGroupedByLocation(Predicate<String> fileSelector) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
Iterable<TabletMetadata> tmi = MetadataScanner.builder().from(ctx).overUserTableId(tableId, startRow, endRow).fetchFiles().fetchLocation().fetchLast().fetchPrev().build();
// get a subset of files
Map<String, List<TabletMetadata>> files = new HashMap<>();
for (TabletMetadata tm : tmi) {
for (String file : tm.getFiles()) {
if (fileSelector.test(file)) {
// TODO push this filtering to server side and possibly use batch scanner
files.computeIfAbsent(file, s -> new ArrayList<>()).add(tm);
}
}
}
// group by location, then file
Map<String, Map<String, List<TRowRange>>> locations = new HashMap<>();
List<String> tservers = null;
for (Entry<String, List<TabletMetadata>> entry : files.entrySet()) {
String location = // filter tablets w/o a location
entry.getValue().stream().filter(tm -> tm.getLocation() != null).map(// convert to host:port strings
tm -> tm.getLocation().getHostAndPort().toString()).min(// find minimum host:port
String::compareTo).orElse(// if no locations, then look at last locations
entry.getValue().stream().filter(tm -> tm.getLast() != null).map(// convert to host:port strings
tm -> tm.getLast().getHostAndPort().toString()).min(String::compareTo).orElse(// find minimum last location or return null
null));
if (location == null) {
if (tservers == null) {
tservers = ctx.getConnector().instanceOperations().getTabletServers();
Collections.sort(tservers);
}
// When no location, the approach below will consistently choose the same tserver for the same file (as long as the set of tservers is stable).
int idx = Math.abs(Hashing.murmur3_32().hashString(entry.getKey()).asInt()) % tservers.size();
location = tservers.get(idx);
}
// merge contiguous ranges
List<Range> merged = Range.mergeOverlapping(Lists.transform(entry.getValue(), tm -> tm.getExtent().toDataRange()));
// clip ranges to queried range
List<TRowRange> ranges = merged.stream().map(r -> toClippedExtent(r).toThrift()).collect(Collectors.toList());
locations.computeIfAbsent(location, s -> new HashMap<>()).put(entry.getKey(), ranges);
}
return locations;
}
Aggregations