Search in sources :

Example 1 with TabletMetadata

use of org.apache.accumulo.core.metadata.schema.TabletMetadata in project accumulo by apache.

the class Gatherer method getFilesGroupedByLocation.

/**
 * @param fileSelector
 *          only returns files that match this predicate
 * @return A map of the form : {@code map<tserver location, map<path, list<range>>} . The ranges associated with a file represent the tablets that use the
 *         file.
 */
private Map<String, Map<String, List<TRowRange>>> getFilesGroupedByLocation(Predicate<String> fileSelector) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
    Iterable<TabletMetadata> tmi = MetadataScanner.builder().from(ctx).overUserTableId(tableId, startRow, endRow).fetchFiles().fetchLocation().fetchLast().fetchPrev().build();
    // get a subset of files
    Map<String, List<TabletMetadata>> files = new HashMap<>();
    for (TabletMetadata tm : tmi) {
        for (String file : tm.getFiles()) {
            if (fileSelector.test(file)) {
                // TODO push this filtering to server side and possibly use batch scanner
                files.computeIfAbsent(file, s -> new ArrayList<>()).add(tm);
            }
        }
    }
    // group by location, then file
    Map<String, Map<String, List<TRowRange>>> locations = new HashMap<>();
    List<String> tservers = null;
    for (Entry<String, List<TabletMetadata>> entry : files.entrySet()) {
        String location = // filter tablets w/o a location
        entry.getValue().stream().filter(tm -> tm.getLocation() != null).map(// convert to host:port strings
        tm -> tm.getLocation().getHostAndPort().toString()).min(// find minimum host:port
        String::compareTo).orElse(// if no locations, then look at last locations
        entry.getValue().stream().filter(tm -> tm.getLast() != null).map(// convert to host:port strings
        tm -> tm.getLast().getHostAndPort().toString()).min(String::compareTo).orElse(// find minimum last location or return null
        null));
        if (location == null) {
            if (tservers == null) {
                tservers = ctx.getConnector().instanceOperations().getTabletServers();
                Collections.sort(tservers);
            }
            // When no location, the approach below will consistently choose the same tserver for the same file (as long as the set of tservers is stable).
            int idx = Math.abs(Hashing.murmur3_32().hashString(entry.getKey()).asInt()) % tservers.size();
            location = tservers.get(idx);
        }
        // merge contiguous ranges
        List<Range> merged = Range.mergeOverlapping(Lists.transform(entry.getValue(), tm -> tm.getExtent().toDataRange()));
        // clip ranges to queried range
        List<TRowRange> ranges = merged.stream().map(r -> toClippedExtent(r).toThrift()).collect(Collectors.toList());
        locations.computeIfAbsent(location, s -> new HashMap<>()).put(entry.getKey(), ranges);
    }
    return locations;
}
Also used : ByteSequence(org.apache.accumulo.core.data.ByteSequence) ThriftUtil(org.apache.accumulo.core.rpc.ThriftUtil) FileSystem(org.apache.hadoop.fs.FileSystem) TTransportException(org.apache.thrift.transport.TTransportException) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) Text(org.apache.hadoop.io.Text) TextUtil(org.apache.accumulo.core.util.TextUtil) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Future(java.util.concurrent.Future) TInfo(org.apache.accumulo.core.trace.thrift.TInfo) Map(java.util.Map) TabletClientService(org.apache.accumulo.core.tabletserver.thrift.TabletClientService) Configuration(org.apache.hadoop.conf.Configuration) BlockCache(org.apache.accumulo.core.file.blockfile.cache.BlockCache) Path(org.apache.hadoop.fs.Path) CompletableFutureUtil(org.apache.accumulo.core.util.CompletableFutureUtil) Table(org.apache.accumulo.core.client.impl.Table) CancelFlagFuture(org.apache.accumulo.core.util.CancelFlagFuture) Predicate(java.util.function.Predicate) Set(java.util.Set) TSummaries(org.apache.accumulo.core.data.thrift.TSummaries) Collectors(java.util.stream.Collectors) Tracer(org.apache.accumulo.core.trace.Tracer) List(java.util.List) TRowRange(org.apache.accumulo.core.data.thrift.TRowRange) Entry(java.util.Map.Entry) Pattern(java.util.regex.Pattern) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) ByteBufferUtil(org.apache.accumulo.core.util.ByteBufferUtil) HostAndPort(org.apache.accumulo.core.util.HostAndPort) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ServerClient(org.apache.accumulo.core.client.impl.ServerClient) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) MetadataScanner(org.apache.accumulo.core.metadata.schema.MetadataScanner) Hashing(com.google.common.hash.Hashing) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) StreamSupport(java.util.stream.StreamSupport) TApplicationException(org.apache.thrift.TApplicationException) ExecutorService(java.util.concurrent.ExecutorService) TSummaryRequest(org.apache.accumulo.core.data.thrift.TSummaryRequest) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) TException(org.apache.thrift.TException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Range(org.apache.accumulo.core.data.Range) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) TreeMap(java.util.TreeMap) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) Preconditions(com.google.common.base.Preconditions) Collections(java.util.Collections) Client(org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Client) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TRowRange(org.apache.accumulo.core.data.thrift.TRowRange) Range(org.apache.accumulo.core.data.Range) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) TRowRange(org.apache.accumulo.core.data.thrift.TRowRange)

Aggregations

Preconditions (com.google.common.base.Preconditions)1 Lists (com.google.common.collect.Lists)1 Hashing (com.google.common.hash.Hashing)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 Set (java.util.Set)1 TreeMap (java.util.TreeMap)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 TimeUnit (java.util.concurrent.TimeUnit)1 TimeoutException (java.util.concurrent.TimeoutException)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1