Search in sources :

Example 26 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class DatafileManager method bringMajorCompactionOnline.

StoredTabletFile bringMajorCompactionOnline(Set<StoredTabletFile> oldDatafiles, TabletFile tmpDatafile, Long compactionId, Set<StoredTabletFile> selectedFiles, DataFileValue dfv, Optional<ExternalCompactionId> ecid) throws IOException {
    final KeyExtent extent = tablet.getExtent();
    VolumeManager vm = tablet.getTabletServer().getContext().getVolumeManager();
    long t1, t2;
    TabletFile newDatafile = CompactableUtils.computeCompactionFileDest(tmpDatafile);
    if (vm.exists(newDatafile.getPath())) {
        log.error("Target map file already exist " + newDatafile, new Exception());
        throw new IllegalStateException("Target map file already exist " + newDatafile);
    }
    if (dfv.getNumEntries() == 0) {
        vm.deleteRecursively(tmpDatafile.getPath());
    } else {
        // rename before putting in metadata table, so files in metadata table should
        // always exist
        rename(vm, tmpDatafile.getPath(), newDatafile.getPath());
    }
    TServerInstance lastLocation = null;
    // calling insert to get the new file before inserting into the metadata
    StoredTabletFile newFile = newDatafile.insert();
    Long compactionIdToWrite = null;
    synchronized (tablet) {
        t1 = System.currentTimeMillis();
        Preconditions.checkState(datafileSizes.keySet().containsAll(oldDatafiles), "Compacted files %s are not a subset of tablet files %s", oldDatafiles, datafileSizes.keySet());
        if (dfv.getNumEntries() > 0) {
            Preconditions.checkState(!datafileSizes.containsKey(newFile), "New compaction file %s already exist in tablet files %s", newFile, datafileSizes.keySet());
        }
        tablet.incrementDataSourceDeletions();
        datafileSizes.keySet().removeAll(oldDatafiles);
        if (dfv.getNumEntries() > 0) {
            datafileSizes.put(newFile, dfv);
        // could be used by a follow on compaction in a multipass compaction
        }
        updateCount++;
        tablet.computeNumEntries();
        lastLocation = tablet.resetLastLocation();
        if (compactionId != null && Collections.disjoint(selectedFiles, datafileSizes.keySet())) {
            compactionIdToWrite = compactionId;
        }
        t2 = System.currentTimeMillis();
    }
    // known consistency issue between minor and major compactions - see ACCUMULO-18
    Set<StoredTabletFile> filesInUseByScans = waitForScansToFinish(oldDatafiles);
    if (!filesInUseByScans.isEmpty())
        log.debug("Adding scan refs to metadata {} {}", extent, filesInUseByScans);
    ManagerMetadataUtil.replaceDatafiles(tablet.getContext(), extent, oldDatafiles, filesInUseByScans, newFile, compactionIdToWrite, dfv, tablet.getTabletServer().getClientAddressString(), lastLocation, tablet.getTabletServer().getLock(), ecid);
    tablet.setLastCompactionID(compactionIdToWrite);
    removeFilesAfterScan(filesInUseByScans);
    if (log.isTraceEnabled()) {
        log.trace(String.format("MajC finish lock %.2f secs", (t2 - t1) / 1000.0));
    }
    return newFile;
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TabletFile(org.apache.accumulo.core.metadata.TabletFile) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) IOException(java.io.IOException) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance)

Example 27 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class DatafileManager method reserveFilesForScan.

Pair<Long, Map<TabletFile, DataFileValue>> reserveFilesForScan() {
    synchronized (tablet) {
        Set<StoredTabletFile> absFilePaths = new HashSet<>(datafileSizes.keySet());
        long rid = nextScanReservationId++;
        scanFileReservations.put(rid, absFilePaths);
        Map<TabletFile, DataFileValue> ret = new HashMap<>();
        for (StoredTabletFile path : absFilePaths) {
            fileScanReferenceCounts.increment(path, 1);
            ret.put(path, datafileSizes.get(path));
        }
        return new Pair<>(rid, ret);
    }
}
Also used : DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) HashMap(java.util.HashMap) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TabletFile(org.apache.accumulo.core.metadata.TabletFile) HashSet(java.util.HashSet) Pair(org.apache.accumulo.core.util.Pair)

Example 28 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class DatafileManager method importMapFiles.

public Collection<StoredTabletFile> importMapFiles(long tid, Map<TabletFile, DataFileValue> paths, boolean setTime) throws IOException {
    String bulkDir = null;
    // once tablet files are inserted into the metadata they will become StoredTabletFiles
    Map<StoredTabletFile, DataFileValue> newFiles = new HashMap<>(paths.size());
    for (TabletFile tpath : paths.keySet()) {
        boolean inTheRightDirectory = false;
        Path parent = tpath.getPath().getParent().getParent();
        for (String tablesDir : tablet.getContext().getTablesDirs()) {
            if (parent.equals(new Path(tablesDir, tablet.getExtent().tableId().canonical()))) {
                inTheRightDirectory = true;
                break;
            }
        }
        if (!inTheRightDirectory) {
            throw new IOException("Data file " + tpath + " not in table dirs");
        }
        if (bulkDir == null)
            bulkDir = tpath.getTabletDir();
        else if (!bulkDir.equals(tpath.getTabletDir()))
            throw new IllegalArgumentException("bulk files in different dirs " + bulkDir + " " + tpath);
    }
    if (tablet.getExtent().isMeta()) {
        throw new IllegalArgumentException("Can not import files to a metadata tablet");
    }
    synchronized (bulkFileImportLock) {
        if (!paths.isEmpty()) {
            long bulkTime = Long.MIN_VALUE;
            if (setTime) {
                for (DataFileValue dfv : paths.values()) {
                    long nextTime = tablet.getAndUpdateTime();
                    if (nextTime < bulkTime)
                        throw new IllegalStateException("Time went backwards unexpectedly " + nextTime + " " + bulkTime);
                    bulkTime = nextTime;
                    dfv.setTime(bulkTime);
                }
            }
            newFiles = tablet.updatePersistedTime(bulkTime, paths, tid);
        }
    }
    synchronized (tablet) {
        for (Entry<StoredTabletFile, DataFileValue> tpath : newFiles.entrySet()) {
            if (datafileSizes.containsKey(tpath.getKey())) {
                log.error("Adding file that is already in set {}", tpath.getKey());
            }
            datafileSizes.put(tpath.getKey(), tpath.getValue());
        }
        updateCount++;
        tablet.getTabletResources().importedMapFiles();
        tablet.computeNumEntries();
    }
    for (Entry<StoredTabletFile, DataFileValue> entry : newFiles.entrySet()) {
        TabletLogger.bulkImported(tablet.getExtent(), entry.getKey());
    }
    return newFiles.keySet();
}
Also used : Path(org.apache.hadoop.fs.Path) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) HashMap(java.util.HashMap) IOException(java.io.IOException) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TabletFile(org.apache.accumulo.core.metadata.TabletFile)

Example 29 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class CompactableUtilsTest method testEquivalence.

@Test
public void testEquivalence() {
    TabletFile expected = new TabletFile(new Path("hdfs://localhost:8020/accumulo/tables/2a/default_tablet/F0000070.rf"));
    TabletFile tmpFile = new TabletFile(new Path(expected.getMetaInsert() + "_tmp"));
    TabletFile dest = CompactableUtils.computeCompactionFileDest(tmpFile);
    assertEquals(expected, dest);
}
Also used : Path(org.apache.hadoop.fs.Path) TabletFile(org.apache.accumulo.core.metadata.TabletFile) Test(org.junit.Test)

Example 30 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class MajorCompactionRequest method getSummaries.

/**
 * Returns all summaries present in each file.
 *
 * <p>
 * This method can only be called from
 * {@link CompactionStrategy#gatherInformation(MajorCompactionRequest)}. Unfortunately,
 * {@code gatherInformation()} is not called before
 * {@link CompactionStrategy#shouldCompact(MajorCompactionRequest)}. Therefore
 * {@code shouldCompact()} should just return true when a compactions strategy wants to use
 * summary information.
 *
 * <p>
 * When using summaries to make compaction decisions, it's important to ensure that all summary
 * data fits in the tablet server summary cache. The size of this cache is configured by code
 * tserver.cache.summary.size}. Also it's important to use the summarySelector predicate to only
 * retrieve the needed summary data. Otherwise unneeded summary data could be brought into the
 * cache.
 *
 * <p>
 * Some files may contain data outside of a tablets range. When {@link Summarizer}'s generate
 * small amounts of summary data, multiple summaries may be stored within a file for different row
 * ranges. This will allow more accurate summaries to be returned for the case where a file has
 * data outside a tablets range. However, some summary data outside of the tablets range may still
 * be included. When this happens {@link FileStatistics#getExtra()} will be non zero. Also, its
 * good to be aware of the other potential causes of inaccuracies
 * {@link FileStatistics#getInaccurate()}
 *
 * <p>
 * When this method is called with multiple files, it will automatically merge summary data using
 * {@link Combiner#merge(Map, Map)}. If summary information is needed for each file, then just
 * call this method for each file.
 *
 * <p>
 * Writing a compaction strategy that uses summary information is a bit tricky. See the source
 * code for {@link TooManyDeletesCompactionStrategy} as an example of a compaction strategy.
 *
 * @see Summarizer
 * @see TableOperations#addSummarizers(String, SummarizerConfiguration...)
 * @see WriterOptions#withSummarizers(SummarizerConfiguration...)
 */
public List<Summary> getSummaries(Collection<StoredTabletFile> files, Predicate<SummarizerConfiguration> summarySelector) {
    Objects.requireNonNull(volumeManager, "Getting summaries is not  supported at this time. It's only supported when " + "CompactionStrategy.gatherInformation() is called.");
    SummaryCollection sc = new SummaryCollection();
    SummarizerFactory factory = new SummarizerFactory(tableConfig);
    for (TabletFile file : files) {
        FileSystem fs = volumeManager.getFileSystemByPath(file.getPath());
        Configuration conf = context.getHadoopConf();
        SummaryCollection fsc = SummaryReader.load(fs, conf, factory, file.getPath(), summarySelector, summaryCache, indexCache, fileLenCache, context.getCryptoService()).getSummaries(Collections.singletonList(new Gatherer.RowRange(extent)));
        sc.merge(fsc, factory);
    }
    return sc.getSummaries();
}
Also used : SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) SummarizerFactory(org.apache.accumulo.core.summary.SummarizerFactory) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TabletFile(org.apache.accumulo.core.metadata.TabletFile) SummaryCollection(org.apache.accumulo.core.summary.SummaryCollection)

Aggregations

TabletFile (org.apache.accumulo.core.metadata.TabletFile)36 StoredTabletFile (org.apache.accumulo.core.metadata.StoredTabletFile)20 IOException (java.io.IOException)17 Path (org.apache.hadoop.fs.Path)15 ArrayList (java.util.ArrayList)14 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)14 Key (org.apache.accumulo.core.data.Key)13 FileSystem (org.apache.hadoop.fs.FileSystem)13 HashMap (java.util.HashMap)12 TreeMap (java.util.TreeMap)10 Value (org.apache.accumulo.core.data.Value)8 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)8 HashSet (java.util.HashSet)6 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)6 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)6 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)6 Text (org.apache.hadoop.io.Text)6 Map (java.util.Map)5 PartialKey (org.apache.accumulo.core.data.PartialKey)5 List (java.util.List)4