Search in sources :

Example 16 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class CompactableUtils method computeCompactionFileDest.

public static TabletFile computeCompactionFileDest(TabletFile tmpFile) {
    String newFilePath = tmpFile.getMetaInsert();
    int idx = newFilePath.indexOf("_tmp");
    if (idx > 0) {
        newFilePath = newFilePath.substring(0, idx);
    } else {
        throw new IllegalArgumentException("Expected compaction tmp file " + tmpFile.getMetaInsert() + " to have suffix '_tmp'");
    }
    return new TabletFile(new Path(newFilePath));
}
Also used : Path(org.apache.hadoop.fs.Path) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TabletFile(org.apache.accumulo.core.metadata.TabletFile)

Example 17 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class ScanDataSource method createIterator.

private SortedKeyValueIterator<Key, Value> createIterator() throws IOException {
    Map<TabletFile, DataFileValue> files;
    SamplerConfigurationImpl samplerConfig = scanParams.getSamplerConfigurationImpl();
    synchronized (tablet) {
        if (memIters != null)
            throw new IllegalStateException("Tried to create new scan iterator w/o releasing memory");
        if (tablet.isClosed())
            throw new TabletClosedException();
        if (interruptFlag.get())
            throw new IterationInterruptedException(tablet.getExtent() + " " + interruptFlag.hashCode());
        // only acquire the file manager when we know the tablet is open
        if (fileManager == null) {
            fileManager = tablet.getTabletResources().newScanFileManager(scanParams.getScanDispatch());
            tablet.getTabletServer().getScanMetrics().incrementOpenFiles(fileManager.getNumOpenFiles());
            tablet.addActiveScans(this);
        }
        if (fileManager.getNumOpenFiles() != 0)
            throw new IllegalStateException("Tried to create new scan iterator w/o releasing files");
        // set this before trying to get iterators in case
        // getIterators() throws an exception
        expectedDeletionCount = tablet.getDataSourceDeletions();
        memIters = tablet.getTabletMemory().getIterators(samplerConfig);
        Pair<Long, Map<TabletFile, DataFileValue>> reservation = tablet.getDatafileManager().reserveFilesForScan();
        fileReservationId = reservation.getFirst();
        files = reservation.getSecond();
    }
    Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, scanParams.isIsolated(), samplerConfig);
    for (SortedKeyValueIterator<Key, Value> skvi : Iterables.concat(mapfiles, memIters)) ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
    List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(mapfiles.size() + memIters.size());
    iters.addAll(mapfiles);
    iters.addAll(memIters);
    MultiIterator multiIter = new MultiIterator(iters, tablet.getExtent());
    TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(tablet.getTabletServer().getContext(), IteratorScope.scan, tablet.getTableConfiguration(), tablet.getExtent().tableId(), fileManager, files, scanParams.getAuthorizations(), samplerConfig, new ArrayList<>());
    statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, tablet.getScannedCounter());
    SortedKeyValueIterator<Key, Value> visFilter = SystemIteratorUtil.setupSystemScanIterators(statsIterator, scanParams.getColumnSet(), scanParams.getAuthorizations(), defaultLabels, tablet.getTableConfiguration());
    if (loadIters) {
        List<IterInfo> iterInfos;
        Map<String, Map<String, String>> iterOpts;
        ParsedIteratorConfig pic = tablet.getTableConfiguration().getParsedIteratorConfig(IteratorScope.scan);
        if (scanParams.getSsiList().isEmpty() && scanParams.getSsio().isEmpty()) {
            // No scan time iterator options were set, so can just use the pre-parsed table iterator
            // options.
            iterInfos = pic.getIterInfo();
            iterOpts = pic.getOpts();
        } else {
            // Scan time iterator options were set, so need to merge those with pre-parsed table
            // iterator options.
            iterOpts = new HashMap<>(pic.getOpts().size() + scanParams.getSsio().size());
            iterInfos = new ArrayList<>(pic.getIterInfo().size() + scanParams.getSsiList().size());
            IterConfigUtil.mergeIteratorConfig(iterInfos, iterOpts, pic.getIterInfo(), pic.getOpts(), scanParams.getSsiList(), scanParams.getSsio());
        }
        String context;
        if (scanParams.getClassLoaderContext() != null) {
            log.trace("Loading iterators for scan with scan context: {}", scanParams.getClassLoaderContext());
            context = scanParams.getClassLoaderContext();
        } else {
            context = pic.getServiceEnv();
            if (context != null) {
                log.trace("Loading iterators for scan with table context: {}", scanParams.getClassLoaderContext());
            } else {
                log.trace("Loading iterators for scan");
            }
        }
        IterLoad il = new IterLoad().iters(iterInfos).iterOpts(iterOpts).iterEnv(iterEnv).useAccumuloClassLoader(true).context(context);
        return iterEnv.getTopLevelIterator(IterConfigUtil.loadIterators(visFilter, il));
    } else {
        return visFilter;
    }
}
Also used : SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) ArrayList(java.util.ArrayList) InterruptibleIterator(org.apache.accumulo.core.iteratorsImpl.system.InterruptibleIterator) IterInfo(org.apache.accumulo.core.dataImpl.thrift.IterInfo) TabletIteratorEnvironment(org.apache.accumulo.server.iterators.TabletIteratorEnvironment) IterationInterruptedException(org.apache.accumulo.core.iteratorsImpl.system.IterationInterruptedException) TabletFile(org.apache.accumulo.core.metadata.TabletFile) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) StatsIterator(org.apache.accumulo.core.iteratorsImpl.system.StatsIterator) IterLoad(org.apache.accumulo.core.conf.IterLoad) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Value(org.apache.accumulo.core.data.Value) ParsedIteratorConfig(org.apache.accumulo.server.conf.TableConfiguration.ParsedIteratorConfig) HashMap(java.util.HashMap) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key)

Example 18 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class MinorCompactionTask method run.

@Override
public void run() {
    tablet.minorCompactionStarted();
    try {
        Span span = TraceUtil.startSpan(this.getClass(), "minorCompaction");
        try (Scope scope = span.makeCurrent()) {
            Span span2 = TraceUtil.startSpan(this.getClass(), "waitForCommits");
            try (Scope scope2 = span2.makeCurrent()) {
                synchronized (tablet) {
                    commitSession.waitForCommitsToFinish();
                }
            } catch (Exception e) {
                TraceUtil.setException(span2, e, true);
                throw e;
            } finally {
                span2.end();
            }
            TabletFile newFile = null;
            TabletFile tmpFile = null;
            Span span3 = TraceUtil.startSpan(this.getClass(), "start");
            try (Scope scope3 = span3.makeCurrent()) {
                while (true) {
                    try {
                        if (newFile == null) {
                            newFile = tablet.getNextMapFilename("F");
                            tmpFile = new TabletFile(new Path(newFile.getPathStr() + "_tmp"));
                        }
                        /*
               * the purpose of the minor compaction start event is to keep track of the filename...
               * in the case where the metadata table write for the minor compaction finishes and
               * the process dies before writing the minor compaction finish event, then the start
               * event+filename in metadata table will prevent recovery of duplicate data... the
               * minor compaction start event could be written at any time before the metadata write
               * for the minor compaction
               */
                        tablet.getTabletServer().minorCompactionStarted(commitSession, commitSession.getWALogSeq() + 1, newFile.getMetaInsert());
                        break;
                    } catch (IOException e) {
                        // An IOException could have occurred while creating the new file
                        if (newFile == null)
                            log.warn("Failed to create new file for minor compaction {}", e.getMessage(), e);
                        else
                            log.warn("Failed to write to write ahead log {}", e.getMessage(), e);
                    }
                }
            } catch (Exception e) {
                TraceUtil.setException(span3, e, true);
                throw e;
            } finally {
                span3.end();
            }
            Span span4 = TraceUtil.startSpan(this.getClass(), "compact");
            try (Scope scope4 = span4.makeCurrent()) {
                this.stats = tablet.minorCompact(tablet.getTabletMemory().getMinCMemTable(), tmpFile, newFile, queued, commitSession, flushId, mincReason);
            } catch (Exception e) {
                TraceUtil.setException(span4, e, true);
                throw e;
            } finally {
                span4.end();
            }
            span.setAttribute("extent", tablet.getExtent().toString());
            span.setAttribute("numEntries", Long.toString(this.stats.getNumEntries()));
            span.setAttribute("size", Long.toString(this.stats.getSize()));
        } catch (Exception e) {
            TraceUtil.setException(span, e, true);
            throw e;
        } finally {
            span.end();
        }
        if (tablet.needsSplit()) {
            tablet.getTabletServer().executeSplit(tablet);
        }
    } catch (Exception e) {
        log.error("Unknown error during minor compaction for extent: {}", tablet.getExtent(), e);
        throw e;
    } finally {
        tablet.minorCompactionComplete();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Scope(io.opentelemetry.context.Scope) TabletFile(org.apache.accumulo.core.metadata.TabletFile) IOException(java.io.IOException) Span(io.opentelemetry.api.trace.Span) IOException(java.io.IOException)

Example 19 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class FileUtil method findMidPoint.

/**
 * @param mapFiles
 *          - list MapFiles to find the mid point key
 *
 *          ISSUES : This method used the index files to find the mid point. If the map files have
 *          different index intervals this method will not return an accurate mid point. Also, it
 *          would be tricky to use this method in conjunction with an in memory map because the
 *          indexing interval is unknown.
 */
public static SortedMap<Double, Key> findMidPoint(ServerContext context, String tabletDirectory, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, double minSplit, boolean useIndex) throws IOException {
    Collection<TabletFile> origMapFiles = mapFiles;
    Path tmpDir = null;
    int maxToOpen = context.getConfiguration().getCount(Property.TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN);
    ArrayList<FileSKVIterator> readers = new ArrayList<>(mapFiles.size());
    try {
        if (mapFiles.size() > maxToOpen) {
            if (!useIndex)
                throw new IOException("Cannot find mid point using data files, too many " + mapFiles.size());
            tmpDir = createTmpDir(context, tabletDirectory);
            log.debug("Too many indexes ({}) to open at once for {} {}, reducing in tmpDir = {}", mapFiles.size(), endRow, prevEndRow, tmpDir);
            long t1 = System.currentTimeMillis();
            mapFiles = reduceFiles(context, context.getHadoopConf(), prevEndRow, endRow, mapFiles, maxToOpen, tmpDir, 0);
            long t2 = System.currentTimeMillis();
            log.debug("Finished reducing indexes for {} {} in {}", endRow, prevEndRow, String.format("%6.2f secs", (t2 - t1) / 1000.0));
        }
        if (prevEndRow == null)
            prevEndRow = new Text();
        long t1 = System.currentTimeMillis();
        long numKeys;
        numKeys = countIndexEntries(context, prevEndRow, endRow, mapFiles, tmpDir == null ? useIndex : false, readers);
        if (numKeys == 0) {
            if (useIndex) {
                log.warn("Failed to find mid point using indexes, falling back to" + " data files which is slower. No entries between {} and {} for {}", prevEndRow, endRow, mapFiles);
                // need to pass original map files, not possibly reduced indexes
                return findMidPoint(context, tabletDirectory, prevEndRow, endRow, origMapFiles, minSplit, false);
            }
            return Collections.emptySortedMap();
        }
        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(readers);
        MultiIterator mmfi = new MultiIterator(iters, true);
        // skip the prevEndRow
        while (mmfi.hasTop() && mmfi.getTopKey().compareRow(prevEndRow) <= 0) mmfi.next();
        // read half of the keys in the index
        TreeMap<Double, Key> ret = new TreeMap<>();
        Key lastKey = null;
        long keysRead = 0;
        Key keyBeforeMidPoint = null;
        long keyBeforeMidPointPosition = 0;
        while (keysRead < numKeys / 2) {
            if (lastKey != null && !lastKey.equals(mmfi.getTopKey(), PartialKey.ROW) && (keysRead - 1) / (double) numKeys >= minSplit) {
                keyBeforeMidPoint = new Key(lastKey);
                keyBeforeMidPointPosition = keysRead - 1;
            }
            if (lastKey == null)
                lastKey = new Key();
            lastKey.set(mmfi.getTopKey());
            keysRead++;
            // consume minimum
            mmfi.next();
        }
        if (keyBeforeMidPoint != null)
            ret.put(keyBeforeMidPointPosition / (double) numKeys, keyBeforeMidPoint);
        long t2 = System.currentTimeMillis();
        log.debug(String.format("Found midPoint from indexes in %6.2f secs.%n", ((t2 - t1) / 1000.0)));
        ret.put(.5, mmfi.getTopKey());
        // sanity check
        for (Key key : ret.values()) {
            boolean inRange = (key.compareRow(prevEndRow) > 0 && (endRow == null || key.compareRow(endRow) < 1));
            if (!inRange) {
                throw new IOException("Found mid point is not in range " + key + " " + prevEndRow + " " + endRow + " " + mapFiles);
            }
        }
        return ret;
    } finally {
        cleanupIndexOp(tmpDir, context.getVolumeManager(), readers);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) TreeMap(java.util.TreeMap) TabletFile(org.apache.accumulo.core.metadata.TabletFile) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 20 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class Gatherer method getFilesGroupedByLocation.

/**
 * @param fileSelector
 *          only returns files that match this predicate
 * @return A map of the form : {@code map<tserver location, map<path, list<range>>} . The ranges
 *         associated with a file represent the tablets that use the file.
 */
private Map<String, Map<TabletFile, List<TRowRange>>> getFilesGroupedByLocation(Predicate<TabletFile> fileSelector) {
    Iterable<TabletMetadata> tmi = TabletsMetadata.builder(ctx).forTable(tableId).overlapping(startRow, endRow).fetch(FILES, LOCATION, LAST, PREV_ROW).build();
    // get a subset of files
    Map<TabletFile, List<TabletMetadata>> files = new HashMap<>();
    for (TabletMetadata tm : tmi) {
        for (TabletFile file : tm.getFiles()) {
            if (fileSelector.test(file)) {
                // TODO push this filtering to server side and possibly use batch scanner
                files.computeIfAbsent(file, s -> new ArrayList<>()).add(tm);
            }
        }
    }
    // group by location, then file
    Map<String, Map<TabletFile, List<TRowRange>>> locations = new HashMap<>();
    List<String> tservers = null;
    for (Entry<TabletFile, List<TabletMetadata>> entry : files.entrySet()) {
        String location = // filter
        entry.getValue().stream().filter(tm -> tm.getLocation() != null).map(// convert to host:port strings
        tm -> tm.getLocation().getHostPort()).min(// find minimum host:port
        String::compareTo).orElse(// if no locations,
        entry.getValue().stream().filter(tm -> tm.getLast() != null).map(// convert to host:port strings
        tm -> tm.getLast().getHostPort()).min(String::compareTo).orElse(// find minimum last location or return null
        null));
        if (location == null) {
            if (tservers == null) {
                tservers = ctx.instanceOperations().getTabletServers();
                Collections.sort(tservers);
            }
            // When no location, the approach below will consistently choose the same tserver for the
            // same file (as long as the set of tservers is stable).
            int idx = Math.abs(Hashing.murmur3_32_fixed().hashString(entry.getKey().getPathStr(), UTF_8).asInt()) % tservers.size();
            location = tservers.get(idx);
        }
        // merge contiguous ranges
        List<Range> merged = Range.mergeOverlapping(Lists.transform(entry.getValue(), tm -> tm.getExtent().toDataRange()));
        List<TRowRange> ranges = // clip
        merged.stream().map(r -> toClippedExtent(r).toThrift()).collect(Collectors.toList());
        // ranges
        // to
        // queried
        // range
        locations.computeIfAbsent(location, s -> new HashMap<>()).put(entry.getKey(), ranges);
    }
    return locations;
}
Also used : TableId(org.apache.accumulo.core.data.TableId) ByteSequence(org.apache.accumulo.core.data.ByteSequence) ThriftUtil(org.apache.accumulo.core.rpc.ThriftUtil) FileSystem(org.apache.hadoop.fs.FileSystem) TTransportException(org.apache.thrift.transport.TTransportException) TabletsMetadata(org.apache.accumulo.core.metadata.schema.TabletsMetadata) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) Text(org.apache.hadoop.io.Text) TextUtil(org.apache.accumulo.core.util.TextUtil) BlockCache(org.apache.accumulo.core.spi.cache.BlockCache) Future(java.util.concurrent.Future) TSummaries(org.apache.accumulo.core.dataImpl.thrift.TSummaries) TInfo(org.apache.accumulo.core.trace.thrift.TInfo) Map(java.util.Map) TabletClientService(org.apache.accumulo.core.tabletserver.thrift.TabletClientService) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) CompletableFutureUtil(org.apache.accumulo.core.util.CompletableFutureUtil) PREV_ROW(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW) CancelFlagFuture(org.apache.accumulo.core.util.CancelFlagFuture) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) TSummaryRequest(org.apache.accumulo.core.dataImpl.thrift.TSummaryRequest) Predicate(java.util.function.Predicate) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Entry(java.util.Map.Entry) Pattern(java.util.regex.Pattern) TraceUtil(org.apache.accumulo.core.trace.TraceUtil) ByteBufferUtil(org.apache.accumulo.core.util.ByteBufferUtil) HostAndPort(org.apache.accumulo.core.util.HostAndPort) NANOSECONDS(java.util.concurrent.TimeUnit.NANOSECONDS) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) ServerClient(org.apache.accumulo.core.clientImpl.ServerClient) TRowRange(org.apache.accumulo.core.dataImpl.thrift.TRowRange) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Hashing(com.google.common.hash.Hashing) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) LAST(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LAST) TabletFile(org.apache.accumulo.core.metadata.TabletFile) TApplicationException(org.apache.thrift.TApplicationException) FILES(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES) ExecutorService(java.util.concurrent.ExecutorService) LOCATION(org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) CryptoService(org.apache.accumulo.core.spi.crypto.CryptoService) UTF_8(java.nio.charset.StandardCharsets.UTF_8) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TException(org.apache.thrift.TException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Range(org.apache.accumulo.core.data.Range) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) TreeMap(java.util.TreeMap) Preconditions(com.google.common.base.Preconditions) Cache(com.google.common.cache.Cache) Collections(java.util.Collections) Client(org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Client) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TRowRange(org.apache.accumulo.core.dataImpl.thrift.TRowRange) Range(org.apache.accumulo.core.data.Range) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) TabletFile(org.apache.accumulo.core.metadata.TabletFile) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) TRowRange(org.apache.accumulo.core.dataImpl.thrift.TRowRange)

Aggregations

TabletFile (org.apache.accumulo.core.metadata.TabletFile)36 StoredTabletFile (org.apache.accumulo.core.metadata.StoredTabletFile)20 IOException (java.io.IOException)17 Path (org.apache.hadoop.fs.Path)15 ArrayList (java.util.ArrayList)14 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)14 Key (org.apache.accumulo.core.data.Key)13 FileSystem (org.apache.hadoop.fs.FileSystem)13 HashMap (java.util.HashMap)12 TreeMap (java.util.TreeMap)10 Value (org.apache.accumulo.core.data.Value)8 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)8 HashSet (java.util.HashSet)6 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)6 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)6 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)6 Text (org.apache.hadoop.io.Text)6 Map (java.util.Map)5 PartialKey (org.apache.accumulo.core.data.PartialKey)5 List (java.util.List)4