Search in sources :

Example 21 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class CompactableImpl method commitExternalCompaction.

@Override
public void commitExternalCompaction(ExternalCompactionId extCompactionId, long fileSize, long entries) {
    synchronized (this) {
        if (closed)
            return;
        // wait on any pending commits
        if (!externalCompactionsCommitting.add(extCompactionId)) {
            return;
        }
    }
    try {
        ExternalCompactionInfo ecInfo = externalCompactions.get(extCompactionId);
        if (ecInfo != null) {
            log.debug("Attempting to commit external compaction {}", extCompactionId);
            StoredTabletFile metaFile = null;
            try {
                metaFile = tablet.getDatafileManager().bringMajorCompactionOnline(ecInfo.meta.getJobFiles(), ecInfo.meta.getCompactTmpName(), ecInfo.meta.getCompactionId(), Sets.union(ecInfo.meta.getJobFiles(), ecInfo.meta.getNextFiles()), new DataFileValue(fileSize, entries), Optional.of(extCompactionId));
                TabletLogger.compacted(getExtent(), ecInfo.job, metaFile);
            } catch (Exception e) {
                metaFile = null;
                log.error("Error committing external compaction {}", extCompactionId, e);
                throw new RuntimeException(e);
            } finally {
                completeCompaction(ecInfo.job, ecInfo.meta.getJobFiles(), metaFile);
                externalCompactions.remove(extCompactionId);
                log.debug("Completed commit of external compaction {}", extCompactionId);
            }
        } else {
            log.debug("Ignoring request to commit external compaction that is unknown {}", extCompactionId);
        }
        tablet.getContext().getAmple().deleteExternalCompactionFinalStates(List.of(extCompactionId));
    } finally {
        synchronized (this) {
            Preconditions.checkState(externalCompactionsCommitting.remove(extCompactionId));
            notifyAll();
        }
    }
}
Also used : DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) UncheckedIOException(java.io.UncheckedIOException) CompactionCanceledException(org.apache.accumulo.server.compaction.FileCompactor.CompactionCanceledException) IOException(java.io.IOException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException)

Example 22 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class CompactableUtils method findChopFiles.

public static Set<StoredTabletFile> findChopFiles(KeyExtent extent, Map<StoredTabletFile, Pair<Key, Key>> firstAndLastKeys, Collection<StoredTabletFile> allFiles) {
    Set<StoredTabletFile> result = new HashSet<>();
    for (StoredTabletFile file : allFiles) {
        Pair<Key, Key> pair = firstAndLastKeys.get(file);
        Key first = pair.getFirst();
        Key last = pair.getSecond();
        // away.
        if ((first == null && last == null) || (first != null && !extent.contains(first.getRow())) || (last != null && !extent.contains(last.getRow()))) {
            result.add(file);
        }
    }
    return result;
}
Also used : StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 23 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class CompactableUtils method selectFiles.

static Set<StoredTabletFile> selectFiles(Tablet tablet, SortedMap<StoredTabletFile, DataFileValue> datafiles, PluginConfig selectorConfig) {
    CompactionSelector selector = newInstance(tablet.getTableConfiguration(), selectorConfig.getClassName(), CompactionSelector.class);
    final ServiceEnvironment senv = new ServiceEnvironmentImpl(tablet.getContext());
    selector.init(new CompactionSelector.InitParameters() {

        @Override
        public Map<String, String> getOptions() {
            return selectorConfig.getOptions();
        }

        @Override
        public PluginEnvironment getEnvironment() {
            return senv;
        }

        @Override
        public TableId getTableId() {
            return tablet.getExtent().tableId();
        }
    });
    Selection selection = selector.select(new CompactionSelector.SelectionParameters() {

        @Override
        public PluginEnvironment getEnvironment() {
            return senv;
        }

        @Override
        public Collection<CompactableFile> getAvailableFiles() {
            return Collections2.transform(datafiles.entrySet(), e -> new CompactableFileImpl(e.getKey(), e.getValue()));
        }

        @Override
        public Collection<Summary> getSummaries(Collection<CompactableFile> files, Predicate<SummarizerConfiguration> summarySelector) {
            var context = tablet.getContext();
            var tsrm = tablet.getTabletResources().getTabletServerResourceManager();
            SummaryCollection sc = new SummaryCollection();
            SummarizerFactory factory = new SummarizerFactory(tablet.getTableConfiguration());
            for (CompactableFile cf : files) {
                var file = CompactableFileImpl.toStoredTabletFile(cf);
                FileSystem fs = context.getVolumeManager().getFileSystemByPath(file.getPath());
                Configuration conf = context.getHadoopConf();
                SummaryCollection fsc = SummaryReader.load(fs, conf, factory, file.getPath(), summarySelector, tsrm.getSummaryCache(), tsrm.getIndexCache(), tsrm.getFileLenCache(), context.getCryptoService()).getSummaries(Collections.singletonList(new Gatherer.RowRange(tablet.getExtent())));
                sc.merge(fsc, factory);
            }
            return sc.getSummaries();
        }

        @Override
        public TableId getTableId() {
            return tablet.getExtent().tableId();
        }

        @Override
        public Optional<SortedKeyValueIterator<Key, Value>> getSample(CompactableFile file, SamplerConfiguration sc) {
            try {
                FileOperations fileFactory = FileOperations.getInstance();
                Path path = new Path(file.getUri());
                FileSystem ns = tablet.getTabletServer().getVolumeManager().getFileSystemByPath(path);
                var fiter = fileFactory.newReaderBuilder().forFile(path.toString(), ns, ns.getConf(), tablet.getContext().getCryptoService()).withTableConfiguration(tablet.getTableConfiguration()).seekToBeginning().build();
                return Optional.ofNullable(fiter.getSample(new SamplerConfigurationImpl(sc)));
            } catch (IOException e) {
                throw new UncheckedIOException(e);
            }
        }
    });
    return selection.getFilesToCompact().stream().map(CompactableFileImpl::toStoredTabletFile).collect(Collectors.toSet());
}
Also used : PluginEnvironment(org.apache.accumulo.core.client.PluginEnvironment) TableId(org.apache.accumulo.core.data.TableId) TableId(org.apache.accumulo.core.data.TableId) FileCompactor(org.apache.accumulo.server.compaction.FileCompactor) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) Collections2(com.google.common.collect.Collections2) MetadataTableUtil(org.apache.accumulo.server.util.MetadataTableUtil) Selection(org.apache.accumulo.core.client.admin.compaction.CompactionSelector.Selection) PluginConfig(org.apache.accumulo.core.client.admin.PluginConfig) CompactableFile(org.apache.accumulo.core.client.admin.compaction.CompactableFile) BlockCache(org.apache.accumulo.core.spi.cache.BlockCache) ConfigurationTypeHelper(org.apache.accumulo.core.conf.ConfigurationTypeHelper) ClassLoaderUtil(org.apache.accumulo.core.classloader.ClassLoaderUtil) FileOperations(org.apache.accumulo.core.file.FileOperations) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) CompactionStrategyConfigUtil(org.apache.accumulo.core.clientImpl.CompactionStrategyConfigUtil) Value(org.apache.accumulo.core.data.Value) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) Property(org.apache.accumulo.core.conf.Property) CompactionEnv(org.apache.accumulo.server.compaction.FileCompactor.CompactionEnv) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) Predicate(java.util.function.Predicate) Collection(java.util.Collection) MajorCompactionReason(org.apache.accumulo.tserver.compaction.MajorCompactionReason) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Set(java.util.Set) SummaryReader(org.apache.accumulo.core.summary.SummaryReader) Collectors(java.util.stream.Collectors) UncheckedIOException(java.io.UncheckedIOException) CompactionStrategyConfig(org.apache.accumulo.core.client.admin.CompactionStrategyConfig) TableConfiguration(org.apache.accumulo.server.conf.TableConfiguration) CompactionSelector(org.apache.accumulo.core.client.admin.compaction.CompactionSelector) Pair(org.apache.accumulo.core.util.Pair) ServiceEnvironment(org.apache.accumulo.core.spi.common.ServiceEnvironment) Optional(java.util.Optional) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) CacheBuilder(com.google.common.cache.CacheBuilder) SortedMap(java.util.SortedMap) SummarizerFactory(org.apache.accumulo.core.summary.SummarizerFactory) CompactionPlan(org.apache.accumulo.tserver.compaction.CompactionPlan) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) CompactionHelper(org.apache.accumulo.tserver.tablet.CompactableImpl.CompactionHelper) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) SummaryCollection(org.apache.accumulo.core.summary.SummaryCollection) HashMap(java.util.HashMap) CompactionConfig(org.apache.accumulo.core.client.admin.CompactionConfig) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) PluginEnvironment(org.apache.accumulo.core.client.PluginEnvironment) HashSet(java.util.HashSet) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) Gatherer(org.apache.accumulo.core.summary.Gatherer) CompactionKind(org.apache.accumulo.core.spi.compaction.CompactionKind) Key(org.apache.accumulo.core.data.Key) ServiceEnvironmentImpl(org.apache.accumulo.server.ServiceEnvironmentImpl) TabletFile(org.apache.accumulo.core.metadata.TabletFile) CompactionStats(org.apache.accumulo.server.compaction.CompactionStats) Summary(org.apache.accumulo.core.client.summary.Summary) CompactionCanceledException(org.apache.accumulo.server.compaction.FileCompactor.CompactionCanceledException) Logger(org.slf4j.Logger) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) MajorCompactionRequest(org.apache.accumulo.tserver.compaction.MajorCompactionRequest) IOException(java.io.IOException) CompactionConfigurer(org.apache.accumulo.core.client.admin.compaction.CompactionConfigurer) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) WriteParameters(org.apache.accumulo.tserver.compaction.WriteParameters) ExecutionException(java.util.concurrent.ExecutionException) CompactionJob(org.apache.accumulo.core.spi.compaction.CompactionJob) Preconditions(com.google.common.base.Preconditions) CompactionStrategy(org.apache.accumulo.tserver.compaction.CompactionStrategy) Cache(com.google.common.cache.Cache) Collections(java.util.Collections) UserCompactionUtils(org.apache.accumulo.core.clientImpl.UserCompactionUtils) CompactableFileImpl(org.apache.accumulo.core.metadata.CompactableFileImpl) Gatherer(org.apache.accumulo.core.summary.Gatherer) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) TableConfiguration(org.apache.accumulo.server.conf.TableConfiguration) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Selection(org.apache.accumulo.core.client.admin.compaction.CompactionSelector.Selection) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) FileOperations(org.apache.accumulo.core.file.FileOperations) SummarizerFactory(org.apache.accumulo.core.summary.SummarizerFactory) UncheckedIOException(java.io.UncheckedIOException) ServiceEnvironment(org.apache.accumulo.core.spi.common.ServiceEnvironment) FileSystem(org.apache.hadoop.fs.FileSystem) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Path(org.apache.hadoop.fs.Path) CompactableFileImpl(org.apache.accumulo.core.metadata.CompactableFileImpl) CompactableFile(org.apache.accumulo.core.client.admin.compaction.CompactableFile) Optional(java.util.Optional) ServiceEnvironmentImpl(org.apache.accumulo.server.ServiceEnvironmentImpl) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) CompactionSelector(org.apache.accumulo.core.client.admin.compaction.CompactionSelector) Value(org.apache.accumulo.core.data.Value) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Collection(java.util.Collection) SummaryCollection(org.apache.accumulo.core.summary.SummaryCollection) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) SummaryCollection(org.apache.accumulo.core.summary.SummaryCollection) Key(org.apache.accumulo.core.data.Key)

Example 24 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class CompactableUtils method getFirstAndLastKeys.

public static Map<StoredTabletFile, Pair<Key, Key>> getFirstAndLastKeys(Tablet tablet, Set<StoredTabletFile> allFiles) throws IOException {
    final Map<StoredTabletFile, Pair<Key, Key>> result = new HashMap<>();
    final FileOperations fileFactory = FileOperations.getInstance();
    final VolumeManager fs = tablet.getTabletServer().getVolumeManager();
    for (StoredTabletFile file : allFiles) {
        FileSystem ns = fs.getFileSystemByPath(file.getPath());
        try (FileSKVIterator openReader = fileFactory.newReaderBuilder().forFile(file.getPathStr(), ns, ns.getConf(), tablet.getContext().getCryptoService()).withTableConfiguration(tablet.getTableConfiguration()).seekToBeginning().build()) {
            Key first = openReader.getFirstKey();
            Key last = openReader.getLastKey();
            result.put(file, new Pair<>(first, last));
        }
    }
    return result;
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) HashMap(java.util.HashMap) FileSystem(org.apache.hadoop.fs.FileSystem) FileOperations(org.apache.accumulo.core.file.FileOperations) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) Key(org.apache.accumulo.core.data.Key) Pair(org.apache.accumulo.core.util.Pair)

Example 25 with StoredTabletFile

use of org.apache.accumulo.core.metadata.StoredTabletFile in project accumulo by apache.

the class DatafileManager method bringMinorCompactionOnline.

/**
 * Returns Optional of the new file created. It is possible that the file was just flushed with no
 * entries so was not inserted into the metadata. In this case empty is returned. If the file was
 * stored in the metadata table, then StoredTableFile will be returned.
 */
Optional<StoredTabletFile> bringMinorCompactionOnline(TabletFile tmpDatafile, TabletFile newDatafile, DataFileValue dfv, CommitSession commitSession, long flushId) {
    Optional<StoredTabletFile> newFile;
    // rename before putting in metadata table, so files in metadata table should
    // always exist
    boolean attemptedRename = false;
    VolumeManager vm = tablet.getTabletServer().getContext().getVolumeManager();
    do {
        try {
            if (dfv.getNumEntries() == 0) {
                log.debug("No data entries so delete temporary file {}", tmpDatafile);
                vm.deleteRecursively(tmpDatafile.getPath());
            } else {
                if (!attemptedRename && vm.exists(newDatafile.getPath())) {
                    log.warn("Target map file already exist {}", newDatafile);
                    throw new RuntimeException("File unexpectedly exists " + newDatafile.getPath());
                }
                // the following checks for spurious rename failures that succeeded but gave an IoE
                if (attemptedRename && vm.exists(newDatafile.getPath()) && !vm.exists(tmpDatafile.getPath())) {
                    // seems like previous rename succeeded, so break
                    break;
                }
                attemptedRename = true;
                rename(vm, tmpDatafile.getPath(), newDatafile.getPath());
            }
            break;
        } catch (IOException ioe) {
            log.warn("Tablet " + tablet.getExtent() + " failed to rename " + newDatafile + " after MinC, will retry in 60 secs...", ioe);
            sleepUninterruptibly(1, TimeUnit.MINUTES);
        }
    } while (true);
    long t1, t2;
    Set<String> unusedWalLogs = tablet.beginClearingUnusedLogs();
    @SuppressWarnings("deprecation") boolean replicate = org.apache.accumulo.core.replication.ReplicationConfigurationUtil.isEnabled(tablet.getExtent(), tablet.getTableConfiguration());
    Set<String> logFileOnly = null;
    if (replicate) {
        // unusedWalLogs is of the form host/fileURI, need to strip off the host portion
        logFileOnly = new HashSet<>();
        for (String unusedWalLog : unusedWalLogs) {
            int index = unusedWalLog.indexOf('/');
            if (index == -1) {
                log.warn("Could not find host component to strip from DFSLogger representation of WAL");
            } else {
                unusedWalLog = unusedWalLog.substring(index + 1);
            }
            logFileOnly.add(unusedWalLog);
        }
    }
    try {
        // the order of writing to metadata and walog is important in the face of machine/process
        // failures need to write to metadata before writing to walog, when things are done in the
        // reverse order data could be lost... the minor compaction start even should be written
        // before the following metadata write is made
        newFile = tablet.updateTabletDataFile(commitSession.getMaxCommittedTime(), newDatafile, dfv, unusedWalLogs, flushId);
        // replication status cannot be 'closed'.
        if (replicate) {
            if (log.isDebugEnabled()) {
                log.debug("Recording that data has been ingested into {} using {}", tablet.getExtent(), logFileOnly);
            }
            for (String logFile : logFileOnly) {
                @SuppressWarnings("deprecation") Status status = org.apache.accumulo.server.replication.StatusUtil.openWithUnknownLength();
                ReplicationTableUtil.updateFiles(tablet.getContext(), tablet.getExtent(), logFile, status);
            }
        }
    } finally {
        tablet.finishClearingUnusedLogs();
    }
    do {
        try {
            // the purpose of making this update use the new commit session, instead of the old one
            // passed in, is because the new one will reference the logs used by current memory...
            tablet.getTabletServer().minorCompactionFinished(tablet.getTabletMemory().getCommitSession(), commitSession.getWALogSeq() + 2);
            break;
        } catch (IOException e) {
            log.error("Failed to write to write-ahead log " + e.getMessage() + " will retry", e);
            sleepUninterruptibly(1, TimeUnit.SECONDS);
        }
    } while (true);
    synchronized (tablet) {
        t1 = System.currentTimeMillis();
        if (dfv.getNumEntries() > 0 && newFile.isPresent()) {
            StoredTabletFile newFileStored = newFile.get();
            if (datafileSizes.containsKey(newFileStored)) {
                log.error("Adding file that is already in set {}", newFileStored);
            }
            datafileSizes.put(newFileStored, dfv);
            updateCount++;
        }
        tablet.flushComplete(flushId);
        t2 = System.currentTimeMillis();
    }
    TabletLogger.flushed(tablet.getExtent(), newFile);
    if (log.isTraceEnabled()) {
        log.trace(String.format("MinC finish lock %.2f secs %s", (t2 - t1) / 1000.0, tablet.getExtent().toString()));
    }
    long splitSize = tablet.getTableConfiguration().getAsBytes(Property.TABLE_SPLIT_THRESHOLD);
    if (dfv.getSize() > splitSize) {
        log.debug(String.format("Minor Compaction wrote out file larger than split threshold." + " split threshold = %,d  file size = %,d", splitSize, dfv.getSize()));
    }
    return newFile;
}
Also used : Status(org.apache.accumulo.server.replication.proto.Replication.Status) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) IOException(java.io.IOException) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile)

Aggregations

StoredTabletFile (org.apache.accumulo.core.metadata.StoredTabletFile)47 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)25 TabletFile (org.apache.accumulo.core.metadata.TabletFile)18 IOException (java.io.IOException)12 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)11 HashMap (java.util.HashMap)9 HashSet (java.util.HashSet)9 Key (org.apache.accumulo.core.data.Key)9 ArrayList (java.util.ArrayList)8 TreeMap (java.util.TreeMap)8 Value (org.apache.accumulo.core.data.Value)8 Path (org.apache.hadoop.fs.Path)7 Text (org.apache.hadoop.io.Text)7 Pair (org.apache.accumulo.core.util.Pair)6 MajorCompactionRequest (org.apache.accumulo.tserver.compaction.MajorCompactionRequest)6 Test (org.junit.Test)6 LogEntry (org.apache.accumulo.core.tabletserver.log.LogEntry)5 UncheckedIOException (java.io.UncheckedIOException)4 CompactionConfig (org.apache.accumulo.core.client.admin.CompactionConfig)4 TServerInstance (org.apache.accumulo.core.metadata.TServerInstance)4