Search in sources :

Example 1 with TRowRange

use of org.apache.accumulo.core.data.thrift.TRowRange in project accumulo by apache.

the class TableOperationsImpl method summaries.

@Override
public SummaryRetriever summaries(String tableName) {
    return new SummaryRetriever() {

        private Text startRow = null;

        private Text endRow = null;

        private List<TSummarizerConfiguration> summariesToFetch = Collections.emptyList();

        private String summarizerClassRegex;

        private boolean flush = false;

        @Override
        public SummaryRetriever startRow(Text startRow) {
            Objects.requireNonNull(startRow);
            if (endRow != null) {
                Preconditions.checkArgument(startRow.compareTo(endRow) < 0, "Start row must be less than end row : %s >= %s", startRow, endRow);
            }
            this.startRow = startRow;
            return this;
        }

        @Override
        public SummaryRetriever startRow(CharSequence startRow) {
            return startRow(new Text(startRow.toString()));
        }

        @Override
        public List<Summary> retrieve() throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
            Table.ID tableId = Tables.getTableId(context.getInstance(), tableName);
            if (Tables.getTableState(context.getInstance(), tableId) == TableState.OFFLINE)
                throw new TableOfflineException(context.getInstance(), tableId.canonicalID());
            TRowRange range = new TRowRange(TextUtil.getByteBuffer(startRow), TextUtil.getByteBuffer(endRow));
            TSummaryRequest request = new TSummaryRequest(tableId.canonicalID(), range, summariesToFetch, summarizerClassRegex);
            if (flush) {
                _flush(tableId, startRow, endRow, true);
            }
            TSummaries ret = ServerClient.execute(context, new TabletClientService.Client.Factory(), client -> {
                TSummaries tsr = client.startGetSummaries(Tracer.traceInfo(), context.rpcCreds(), request);
                while (!tsr.finished) {
                    tsr = client.contiuneGetSummaries(Tracer.traceInfo(), tsr.sessionId);
                }
                return tsr;
            });
            return new SummaryCollection(ret).getSummaries();
        }

        @Override
        public SummaryRetriever endRow(Text endRow) {
            Objects.requireNonNull(endRow);
            if (startRow != null) {
                Preconditions.checkArgument(startRow.compareTo(endRow) < 0, "Start row must be less than end row : %s >= %s", startRow, endRow);
            }
            this.endRow = endRow;
            return this;
        }

        @Override
        public SummaryRetriever endRow(CharSequence endRow) {
            return endRow(new Text(endRow.toString()));
        }

        @Override
        public SummaryRetriever withConfiguration(Collection<SummarizerConfiguration> configs) {
            Objects.requireNonNull(configs);
            summariesToFetch = configs.stream().map(SummarizerConfigurationUtil::toThrift).collect(Collectors.toList());
            return this;
        }

        @Override
        public SummaryRetriever withConfiguration(SummarizerConfiguration... config) {
            Objects.requireNonNull(config);
            return withConfiguration(Arrays.asList(config));
        }

        @Override
        public SummaryRetriever withMatchingConfiguration(String regex) {
            Objects.requireNonNull(regex);
            // Do a sanity check here to make sure that regex compiles, instead of having it fail on a tserver.
            Pattern.compile(regex);
            this.summarizerClassRegex = regex;
            return this;
        }

        @Override
        public SummaryRetriever flush(boolean b) {
            this.flush = b;
            return this;
        }
    };
}
Also used : RootTable(org.apache.accumulo.core.metadata.RootTable) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) Text(org.apache.hadoop.io.Text) SummarizerConfigurationUtil(org.apache.accumulo.core.summary.SummarizerConfigurationUtil) SummaryRetriever(org.apache.accumulo.core.client.admin.SummaryRetriever) TSummaryRequest(org.apache.accumulo.core.data.thrift.TSummaryRequest) TSummaries(org.apache.accumulo.core.data.thrift.TSummaries) Summary(org.apache.accumulo.core.client.summary.Summary) SummaryCollection(org.apache.accumulo.core.summary.SummaryCollection) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) Client(org.apache.accumulo.core.client.impl.thrift.ClientService.Client) TRowRange(org.apache.accumulo.core.data.thrift.TRowRange) SummaryCollection(org.apache.accumulo.core.summary.SummaryCollection) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) TSummarizerConfiguration(org.apache.accumulo.core.data.thrift.TSummarizerConfiguration)

Example 2 with TRowRange

use of org.apache.accumulo.core.data.thrift.TRowRange in project accumulo by apache.

the class Gatherer method getFilesGroupedByLocation.

/**
 * @param fileSelector
 *          only returns files that match this predicate
 * @return A map of the form : {@code map<tserver location, map<path, list<range>>} . The ranges associated with a file represent the tablets that use the
 *         file.
 */
private Map<String, Map<String, List<TRowRange>>> getFilesGroupedByLocation(Predicate<String> fileSelector) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
    Iterable<TabletMetadata> tmi = MetadataScanner.builder().from(ctx).overUserTableId(tableId, startRow, endRow).fetchFiles().fetchLocation().fetchLast().fetchPrev().build();
    // get a subset of files
    Map<String, List<TabletMetadata>> files = new HashMap<>();
    for (TabletMetadata tm : tmi) {
        for (String file : tm.getFiles()) {
            if (fileSelector.test(file)) {
                // TODO push this filtering to server side and possibly use batch scanner
                files.computeIfAbsent(file, s -> new ArrayList<>()).add(tm);
            }
        }
    }
    // group by location, then file
    Map<String, Map<String, List<TRowRange>>> locations = new HashMap<>();
    List<String> tservers = null;
    for (Entry<String, List<TabletMetadata>> entry : files.entrySet()) {
        String location = // filter tablets w/o a location
        entry.getValue().stream().filter(tm -> tm.getLocation() != null).map(// convert to host:port strings
        tm -> tm.getLocation().getHostAndPort().toString()).min(// find minimum host:port
        String::compareTo).orElse(// if no locations, then look at last locations
        entry.getValue().stream().filter(tm -> tm.getLast() != null).map(// convert to host:port strings
        tm -> tm.getLast().getHostAndPort().toString()).min(String::compareTo).orElse(// find minimum last location or return null
        null));
        if (location == null) {
            if (tservers == null) {
                tservers = ctx.getConnector().instanceOperations().getTabletServers();
                Collections.sort(tservers);
            }
            // When no location, the approach below will consistently choose the same tserver for the same file (as long as the set of tservers is stable).
            int idx = Math.abs(Hashing.murmur3_32().hashString(entry.getKey()).asInt()) % tservers.size();
            location = tservers.get(idx);
        }
        // merge contiguous ranges
        List<Range> merged = Range.mergeOverlapping(Lists.transform(entry.getValue(), tm -> tm.getExtent().toDataRange()));
        // clip ranges to queried range
        List<TRowRange> ranges = merged.stream().map(r -> toClippedExtent(r).toThrift()).collect(Collectors.toList());
        locations.computeIfAbsent(location, s -> new HashMap<>()).put(entry.getKey(), ranges);
    }
    return locations;
}
Also used : ByteSequence(org.apache.accumulo.core.data.ByteSequence) ThriftUtil(org.apache.accumulo.core.rpc.ThriftUtil) FileSystem(org.apache.hadoop.fs.FileSystem) TTransportException(org.apache.thrift.transport.TTransportException) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) Text(org.apache.hadoop.io.Text) TextUtil(org.apache.accumulo.core.util.TextUtil) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Future(java.util.concurrent.Future) TInfo(org.apache.accumulo.core.trace.thrift.TInfo) Map(java.util.Map) TabletClientService(org.apache.accumulo.core.tabletserver.thrift.TabletClientService) Configuration(org.apache.hadoop.conf.Configuration) BlockCache(org.apache.accumulo.core.file.blockfile.cache.BlockCache) Path(org.apache.hadoop.fs.Path) CompletableFutureUtil(org.apache.accumulo.core.util.CompletableFutureUtil) Table(org.apache.accumulo.core.client.impl.Table) CancelFlagFuture(org.apache.accumulo.core.util.CancelFlagFuture) Predicate(java.util.function.Predicate) Set(java.util.Set) TSummaries(org.apache.accumulo.core.data.thrift.TSummaries) Collectors(java.util.stream.Collectors) Tracer(org.apache.accumulo.core.trace.Tracer) List(java.util.List) TRowRange(org.apache.accumulo.core.data.thrift.TRowRange) Entry(java.util.Map.Entry) Pattern(java.util.regex.Pattern) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) ByteBufferUtil(org.apache.accumulo.core.util.ByteBufferUtil) HostAndPort(org.apache.accumulo.core.util.HostAndPort) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ServerClient(org.apache.accumulo.core.client.impl.ServerClient) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) MetadataScanner(org.apache.accumulo.core.metadata.schema.MetadataScanner) Hashing(com.google.common.hash.Hashing) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) StreamSupport(java.util.stream.StreamSupport) TApplicationException(org.apache.thrift.TApplicationException) ExecutorService(java.util.concurrent.ExecutorService) TSummaryRequest(org.apache.accumulo.core.data.thrift.TSummaryRequest) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) TException(org.apache.thrift.TException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Range(org.apache.accumulo.core.data.Range) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) TreeMap(java.util.TreeMap) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) Preconditions(com.google.common.base.Preconditions) Collections(java.util.Collections) Client(org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Client) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TRowRange(org.apache.accumulo.core.data.thrift.TRowRange) Range(org.apache.accumulo.core.data.Range) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) TRowRange(org.apache.accumulo.core.data.thrift.TRowRange)

Aggregations

ArrayList (java.util.ArrayList)2 List (java.util.List)2 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)2 TRowRange (org.apache.accumulo.core.data.thrift.TRowRange)2 TSummaries (org.apache.accumulo.core.data.thrift.TSummaries)2 TSummaryRequest (org.apache.accumulo.core.data.thrift.TSummaryRequest)2 Preconditions (com.google.common.base.Preconditions)1 Lists (com.google.common.collect.Lists)1 Hashing (com.google.common.hash.Hashing)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 Set (java.util.Set)1 TreeMap (java.util.TreeMap)1 CompletableFuture (java.util.concurrent.CompletableFuture)1