Search in sources :

Example 1 with FILES

use of org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES in project accumulo by apache.

the class BulkNewIT method verifyMetadata.

private void verifyMetadata(AccumuloClient client, String tableName, Map<String, Set<String>> expectedHashes) {
    Set<String> endRowsSeen = new HashSet<>();
    String id = client.tableOperations().tableIdMap().get(tableName);
    try (TabletsMetadata tablets = TabletsMetadata.builder(client).forTable(TableId.of(id)).fetch(FILES, LOADED, PREV_ROW).build()) {
        for (TabletMetadata tablet : tablets) {
            assertTrue(tablet.getLoaded().isEmpty());
            Set<String> fileHashes = tablet.getFiles().stream().map(f -> hash(f.getMetaUpdateDelete())).collect(Collectors.toSet());
            String endRow = tablet.getEndRow() == null ? "null" : tablet.getEndRow().toString();
            assertEquals(expectedHashes.get(endRow), fileHashes);
            endRowsSeen.add(endRow);
        }
        assertEquals(expectedHashes.keySet(), endRowsSeen);
    }
}
Also used : TableId(org.apache.accumulo.core.data.TableId) Arrays(java.util.Arrays) SortedSet(java.util.SortedSet) FileSystem(org.apache.hadoop.fs.FileSystem) TabletsMetadata(org.apache.accumulo.core.metadata.schema.TabletsMetadata) Text(org.apache.hadoop.io.Text) FsPermission(org.apache.hadoop.fs.permission.FsPermission) FileOperations(org.apache.accumulo.core.file.FileOperations) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) BigInteger(java.math.BigInteger) Value(org.apache.accumulo.core.data.Value) PREV_ROW(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW) Property(org.apache.accumulo.core.conf.Property) LoadPlan(org.apache.accumulo.core.data.LoadPlan) ServerType(org.apache.accumulo.minicluster.ServerType) AfterClass(org.junit.AfterClass) Set(java.util.Set) TimeType(org.apache.accumulo.core.client.admin.TimeType) MiniClusterConfigurationCallback(org.apache.accumulo.harness.MiniClusterConfigurationCallback) RangeType(org.apache.accumulo.core.data.LoadPlan.RangeType) LOADED(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOADED) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) AccumuloClient(org.apache.accumulo.core.client.AccumuloClient) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) Entry(java.util.Map.Entry) Scanner(org.apache.accumulo.core.client.Scanner) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) BeforeClass(org.junit.BeforeClass) MessageDigest(java.security.MessageDigest) Assert.assertThrows(org.junit.Assert.assertThrows) HashMap(java.util.HashMap) Accumulo(org.apache.accumulo.core.client.Accumulo) TreeSet(java.util.TreeSet) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) MemoryUnit(org.apache.accumulo.minicluster.MemoryUnit) HashSet(java.util.HashSet) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Key(org.apache.accumulo.core.data.Key) MiniAccumuloConfigImpl(org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl) FILES(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES) Before(org.junit.Before) Iterator(java.util.Iterator) Files(java.nio.file.Files) RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) Authorizations(org.apache.accumulo.core.security.Authorizations) CryptoServiceFactory(org.apache.accumulo.core.crypto.CryptoServiceFactory) AccumuloException(org.apache.accumulo.core.client.AccumuloException) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) ExecutionException(java.util.concurrent.ExecutionException) SharedMiniClusterBase(org.apache.accumulo.harness.SharedMiniClusterBase) RFile(org.apache.accumulo.core.file.rfile.RFile) Paths(java.nio.file.Paths) Assert.assertEquals(org.junit.Assert.assertEquals) TabletsMetadata(org.apache.accumulo.core.metadata.schema.TabletsMetadata) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) HashSet(java.util.HashSet)

Example 2 with FILES

use of org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES in project accumulo by apache.

the class Merge method getSizeIterator.

protected Iterator<Size> getSizeIterator(AccumuloClient client, String tablename, Text start, Text end) throws MergeException {
    // open up metadata, walk through the tablets.
    TableId tableId;
    TabletsMetadata tablets;
    try {
        ClientContext context = (ClientContext) client;
        tableId = context.getTableId(tablename);
        tablets = TabletsMetadata.builder(context).scanMetadataTable().overRange(new KeyExtent(tableId, end, start).toMetaRange()).fetch(FILES, PREV_ROW).build();
    } catch (Exception e) {
        throw new MergeException(e);
    }
    return tablets.stream().map(tm -> {
        long size = tm.getFilesMap().values().stream().mapToLong(DataFileValue::getSize).sum();
        return new Size(tm.getExtent(), size);
    }).iterator();
}
Also used : TableId(org.apache.accumulo.core.data.TableId) TableId(org.apache.accumulo.core.data.TableId) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Parameter(com.beust.jcommander.Parameter) TabletsMetadata(org.apache.accumulo.core.metadata.schema.TabletsMetadata) LoggerFactory(org.slf4j.LoggerFactory) Text(org.apache.hadoop.io.Text) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) Accumulo(org.apache.accumulo.core.client.Accumulo) ArrayList(java.util.ArrayList) ConfigurationTypeHelper(org.apache.accumulo.core.conf.ConfigurationTypeHelper) IStringConverter(com.beust.jcommander.IStringConverter) PREV_ROW(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW) FILES(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES) Property(org.apache.accumulo.core.conf.Property) ClientOpts(org.apache.accumulo.core.cli.ClientOpts) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Span(io.opentelemetry.api.trace.Span) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) Scope(io.opentelemetry.context.Scope) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) AccumuloClient(org.apache.accumulo.core.client.AccumuloClient) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) List(java.util.List) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) TraceUtil(org.apache.accumulo.core.trace.TraceUtil) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) TabletsMetadata(org.apache.accumulo.core.metadata.schema.TabletsMetadata) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent)

Example 3 with FILES

use of org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES in project accumulo by apache.

the class Gatherer method getFilesGroupedByLocation.

/**
 * @param fileSelector
 *          only returns files that match this predicate
 * @return A map of the form : {@code map<tserver location, map<path, list<range>>} . The ranges
 *         associated with a file represent the tablets that use the file.
 */
private Map<String, Map<TabletFile, List<TRowRange>>> getFilesGroupedByLocation(Predicate<TabletFile> fileSelector) {
    Iterable<TabletMetadata> tmi = TabletsMetadata.builder(ctx).forTable(tableId).overlapping(startRow, endRow).fetch(FILES, LOCATION, LAST, PREV_ROW).build();
    // get a subset of files
    Map<TabletFile, List<TabletMetadata>> files = new HashMap<>();
    for (TabletMetadata tm : tmi) {
        for (TabletFile file : tm.getFiles()) {
            if (fileSelector.test(file)) {
                // TODO push this filtering to server side and possibly use batch scanner
                files.computeIfAbsent(file, s -> new ArrayList<>()).add(tm);
            }
        }
    }
    // group by location, then file
    Map<String, Map<TabletFile, List<TRowRange>>> locations = new HashMap<>();
    List<String> tservers = null;
    for (Entry<TabletFile, List<TabletMetadata>> entry : files.entrySet()) {
        String location = // filter
        entry.getValue().stream().filter(tm -> tm.getLocation() != null).map(// convert to host:port strings
        tm -> tm.getLocation().getHostPort()).min(// find minimum host:port
        String::compareTo).orElse(// if no locations,
        entry.getValue().stream().filter(tm -> tm.getLast() != null).map(// convert to host:port strings
        tm -> tm.getLast().getHostPort()).min(String::compareTo).orElse(// find minimum last location or return null
        null));
        if (location == null) {
            if (tservers == null) {
                tservers = ctx.instanceOperations().getTabletServers();
                Collections.sort(tservers);
            }
            // When no location, the approach below will consistently choose the same tserver for the
            // same file (as long as the set of tservers is stable).
            int idx = Math.abs(Hashing.murmur3_32_fixed().hashString(entry.getKey().getPathStr(), UTF_8).asInt()) % tservers.size();
            location = tservers.get(idx);
        }
        // merge contiguous ranges
        List<Range> merged = Range.mergeOverlapping(Lists.transform(entry.getValue(), tm -> tm.getExtent().toDataRange()));
        List<TRowRange> ranges = // clip
        merged.stream().map(r -> toClippedExtent(r).toThrift()).collect(Collectors.toList());
        // ranges
        // to
        // queried
        // range
        locations.computeIfAbsent(location, s -> new HashMap<>()).put(entry.getKey(), ranges);
    }
    return locations;
}
Also used : TableId(org.apache.accumulo.core.data.TableId) ByteSequence(org.apache.accumulo.core.data.ByteSequence) ThriftUtil(org.apache.accumulo.core.rpc.ThriftUtil) FileSystem(org.apache.hadoop.fs.FileSystem) TTransportException(org.apache.thrift.transport.TTransportException) TabletsMetadata(org.apache.accumulo.core.metadata.schema.TabletsMetadata) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) Text(org.apache.hadoop.io.Text) TextUtil(org.apache.accumulo.core.util.TextUtil) BlockCache(org.apache.accumulo.core.spi.cache.BlockCache) Future(java.util.concurrent.Future) TSummaries(org.apache.accumulo.core.dataImpl.thrift.TSummaries) TInfo(org.apache.accumulo.core.trace.thrift.TInfo) Map(java.util.Map) TabletClientService(org.apache.accumulo.core.tabletserver.thrift.TabletClientService) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) CompletableFutureUtil(org.apache.accumulo.core.util.CompletableFutureUtil) PREV_ROW(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW) CancelFlagFuture(org.apache.accumulo.core.util.CancelFlagFuture) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) TSummaryRequest(org.apache.accumulo.core.dataImpl.thrift.TSummaryRequest) Predicate(java.util.function.Predicate) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Entry(java.util.Map.Entry) Pattern(java.util.regex.Pattern) TraceUtil(org.apache.accumulo.core.trace.TraceUtil) ByteBufferUtil(org.apache.accumulo.core.util.ByteBufferUtil) HostAndPort(org.apache.accumulo.core.util.HostAndPort) NANOSECONDS(java.util.concurrent.TimeUnit.NANOSECONDS) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) ServerClient(org.apache.accumulo.core.clientImpl.ServerClient) TRowRange(org.apache.accumulo.core.dataImpl.thrift.TRowRange) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Hashing(com.google.common.hash.Hashing) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) LAST(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LAST) TabletFile(org.apache.accumulo.core.metadata.TabletFile) TApplicationException(org.apache.thrift.TApplicationException) FILES(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES) ExecutorService(java.util.concurrent.ExecutorService) LOCATION(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) CryptoService(org.apache.accumulo.core.spi.crypto.CryptoService) UTF_8(java.nio.charset.StandardCharsets.UTF_8) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TException(org.apache.thrift.TException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Range(org.apache.accumulo.core.data.Range) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) TreeMap(java.util.TreeMap) Preconditions(com.google.common.base.Preconditions) Cache(com.google.common.cache.Cache) Collections(java.util.Collections) Client(org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Client) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TRowRange(org.apache.accumulo.core.dataImpl.thrift.TRowRange) Range(org.apache.accumulo.core.data.Range) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) TabletFile(org.apache.accumulo.core.metadata.TabletFile) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) TRowRange(org.apache.accumulo.core.dataImpl.thrift.TRowRange)

Aggregations

Iterator (java.util.Iterator)3 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)3 TableId (org.apache.accumulo.core.data.TableId)3 FILES (org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES)3 PREV_ROW (org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 Set (java.util.Set)2 ExecutionException (java.util.concurrent.ExecutionException)2 ClientContext (org.apache.accumulo.core.clientImpl.ClientContext)2 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)2 TabletsMetadata (org.apache.accumulo.core.metadata.schema.TabletsMetadata)2 TraceUtil (org.apache.accumulo.core.trace.TraceUtil)2 Text (org.apache.hadoop.io.Text)2 Logger (org.slf4j.Logger)2 LoggerFactory (org.slf4j.LoggerFactory)2