Search in sources :

Example 31 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class DatafileManager method returnFilesForScan.

void returnFilesForScan(Long reservationId) {
    final Set<FileRef> filesToDelete = new HashSet<>();
    synchronized (tablet) {
        Set<FileRef> absFilePaths = scanFileReservations.remove(reservationId);
        if (absFilePaths == null)
            throw new IllegalArgumentException("Unknown scan reservation id " + reservationId);
        boolean notify = false;
        for (FileRef path : absFilePaths) {
            long refCount = fileScanReferenceCounts.decrement(path, 1);
            if (refCount == 0) {
                if (filesToDeleteAfterScan.remove(path))
                    filesToDelete.add(path);
                notify = true;
            } else if (refCount < 0)
                throw new IllegalStateException("Scan ref count for " + path + " is " + refCount);
        }
        if (notify)
            tablet.notifyAll();
    }
    if (filesToDelete.size() > 0) {
        log.debug("Removing scan refs from metadata {} {}", tablet.getExtent(), filesToDelete);
        MetadataTableUtil.removeScanFiles(tablet.getExtent(), filesToDelete, tablet.getTabletServer(), tablet.getTabletServer().getLock());
    }
}
Also used : FileRef(org.apache.accumulo.server.fs.FileRef) HashSet(java.util.HashSet)

Example 32 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class DatafileManager method bringMajorCompactionOnline.

void bringMajorCompactionOnline(Set<FileRef> oldDatafiles, FileRef tmpDatafile, FileRef newDatafile, Long compactionId, DataFileValue dfv) throws IOException {
    final KeyExtent extent = tablet.getExtent();
    long t1, t2;
    if (!extent.isRootTablet()) {
        if (tablet.getTabletServer().getFileSystem().exists(newDatafile.path())) {
            log.error("Target map file already exist " + newDatafile, new Exception());
            throw new IllegalStateException("Target map file already exist " + newDatafile);
        }
        // rename before putting in metadata table, so files in metadata table should
        // always exist
        rename(tablet.getTabletServer().getFileSystem(), tmpDatafile.path(), newDatafile.path());
        if (dfv.getNumEntries() == 0) {
            tablet.getTabletServer().getFileSystem().deleteRecursively(newDatafile.path());
        }
    }
    TServerInstance lastLocation = null;
    synchronized (tablet) {
        t1 = System.currentTimeMillis();
        IZooReaderWriter zoo = ZooReaderWriter.getInstance();
        tablet.incrementDataSourceDeletions();
        if (extent.isRootTablet()) {
            waitForScansToFinish(oldDatafiles, true, Long.MAX_VALUE);
            try {
                if (!zoo.isLockHeld(tablet.getTabletServer().getLock().getLockID())) {
                    throw new IllegalStateException();
                }
            } catch (Exception e) {
                throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
            }
            // mark files as ready for deletion, but
            // do not delete them until we successfully
            // rename the compacted map file, in case
            // the system goes down
            RootFiles.replaceFiles(tablet.getTableConfiguration(), tablet.getTabletServer().getFileSystem(), tablet.getLocation(), oldDatafiles, tmpDatafile, newDatafile);
        }
        // atomically remove old files and add new file
        for (FileRef oldDatafile : oldDatafiles) {
            if (!datafileSizes.containsKey(oldDatafile)) {
                log.error("file does not exist in set {}", oldDatafile);
            }
            datafileSizes.remove(oldDatafile);
            majorCompactingFiles.remove(oldDatafile);
        }
        if (datafileSizes.containsKey(newDatafile)) {
            log.error("Adding file that is already in set {}", newDatafile);
        }
        if (dfv.getNumEntries() > 0) {
            datafileSizes.put(newDatafile, dfv);
        }
        // could be used by a follow on compaction in a multipass compaction
        majorCompactingFiles.add(newDatafile);
        tablet.computeNumEntries();
        lastLocation = tablet.resetLastLocation();
        tablet.setLastCompactionID(compactionId);
        t2 = System.currentTimeMillis();
    }
    if (!extent.isRootTablet()) {
        Set<FileRef> filesInUseByScans = waitForScansToFinish(oldDatafiles, false, 10000);
        if (filesInUseByScans.size() > 0)
            log.debug("Adding scan refs to metadata {} {}", extent, filesInUseByScans);
        MasterMetadataUtil.replaceDatafiles(tablet.getTabletServer(), extent, oldDatafiles, filesInUseByScans, newDatafile, compactionId, dfv, tablet.getTabletServer().getClientAddressString(), lastLocation, tablet.getTabletServer().getLock());
        removeFilesAfterScan(filesInUseByScans);
    }
    log.debug(String.format("MajC finish lock %.2f secs", (t2 - t1) / 1000.0));
    log.debug("TABLET_HIST {} MajC  --> {}", oldDatafiles, newDatafile);
}
Also used : FileRef(org.apache.accumulo.server.fs.FileRef) IZooReaderWriter(org.apache.accumulo.fate.zookeeper.IZooReaderWriter) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) IOException(java.io.IOException) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance)

Example 33 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class DatafileManager method importMapFiles.

public void importMapFiles(long tid, Map<FileRef, DataFileValue> pathsString, boolean setTime) throws IOException {
    String bulkDir = null;
    Map<FileRef, DataFileValue> paths = new HashMap<>();
    for (Entry<FileRef, DataFileValue> entry : pathsString.entrySet()) paths.put(entry.getKey(), entry.getValue());
    for (FileRef tpath : paths.keySet()) {
        boolean inTheRightDirectory = false;
        Path parent = tpath.path().getParent().getParent();
        for (String tablesDir : ServerConstants.getTablesDirs()) {
            if (parent.equals(new Path(tablesDir, tablet.getExtent().getTableId().canonicalID()))) {
                inTheRightDirectory = true;
                break;
            }
        }
        if (!inTheRightDirectory) {
            throw new IOException("Data file " + tpath + " not in table dirs");
        }
        if (bulkDir == null)
            bulkDir = tpath.path().getParent().toString();
        else if (!bulkDir.equals(tpath.path().getParent().toString()))
            throw new IllegalArgumentException("bulk files in different dirs " + bulkDir + " " + tpath);
    }
    if (tablet.getExtent().isMeta()) {
        throw new IllegalArgumentException("Can not import files to a metadata tablet");
    }
    synchronized (bulkFileImportLock) {
        if (paths.size() > 0) {
            long bulkTime = Long.MIN_VALUE;
            if (setTime) {
                for (DataFileValue dfv : paths.values()) {
                    long nextTime = tablet.getAndUpdateTime();
                    if (nextTime < bulkTime)
                        throw new IllegalStateException("Time went backwards unexpectedly " + nextTime + " " + bulkTime);
                    bulkTime = nextTime;
                    dfv.setTime(bulkTime);
                }
            }
            tablet.updatePersistedTime(bulkTime, paths, tid);
        }
    }
    synchronized (tablet) {
        for (Entry<FileRef, DataFileValue> tpath : paths.entrySet()) {
            if (datafileSizes.containsKey(tpath.getKey())) {
                log.error("Adding file that is already in set {}", tpath.getKey());
            }
            datafileSizes.put(tpath.getKey(), tpath.getValue());
        }
        tablet.getTabletResources().importedMapFiles();
        tablet.computeNumEntries();
    }
    for (Entry<FileRef, DataFileValue> entry : paths.entrySet()) {
        log.debug("TABLET_HIST {} import {} {}", tablet.getExtent(), entry.getKey(), entry.getValue());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) FileRef(org.apache.accumulo.server.fs.FileRef) HashMap(java.util.HashMap) IOException(java.io.IOException)

Example 34 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class DatafileManager method reserveFilesForScan.

Pair<Long, Map<FileRef, DataFileValue>> reserveFilesForScan() {
    synchronized (tablet) {
        while (reservationsBlocked) {
            try {
                tablet.wait(50);
            } catch (InterruptedException e) {
                log.warn("{}", e.getMessage(), e);
            }
        }
        Set<FileRef> absFilePaths = new HashSet<>(datafileSizes.keySet());
        long rid = nextScanReservationId++;
        scanFileReservations.put(rid, absFilePaths);
        Map<FileRef, DataFileValue> ret = new HashMap<>();
        for (FileRef path : absFilePaths) {
            fileScanReferenceCounts.increment(path, 1);
            ret.put(path, datafileSizes.get(path));
        }
        return new Pair<>(rid, ret);
    }
}
Also used : DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) FileRef(org.apache.accumulo.server.fs.FileRef) HashMap(java.util.HashMap) HashSet(java.util.HashSet) Pair(org.apache.accumulo.core.util.Pair)

Example 35 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class MajorCompactionRequest method getSummaries.

/**
 * Returns all summaries present in each file.
 *
 * <p>
 * This method can only be called from {@link CompactionStrategy#gatherInformation(MajorCompactionRequest)}. Unfortunately, {@code gatherInformation()} is not
 * called before {@link CompactionStrategy#shouldCompact(MajorCompactionRequest)}. Therefore {@code shouldCompact()} should just return true when a
 * compactions strategy wants to use summary information.
 *
 * <p>
 * When using summaries to make compaction decisions, its important to ensure that all summary data fits in the tablet server summary cache. The size of this
 * cache is configured by code tserver.cache.summary.size}. Also its important to use the summarySelector predicate to only retrieve the needed summary data.
 * Otherwise uneeded summary data could be brought into the cache.
 *
 * <p>
 * Some files may contain data outside of a tablets range. When {@link Summarizer}'s generate small amounts of summary data, multiple summaries may be stored
 * within a file for different row ranges. This will allow more accurate summaries to be returned for the case where a file has data outside a tablets range.
 * However, some summary data outside of the tablets range may still be included. When this happens {@link FileStatistics#getExtra()} will be non zero. Also,
 * its good to be aware of the other potential causes of inaccuracies {@link FileStatistics#getInaccurate()}
 *
 * <p>
 * When this method is called with multiple files, it will automatically merge summary data using {@link Combiner#merge(Map, Map)}. If summary information is
 * needed for each file, then just call this method for each file.
 *
 * <p>
 * Writing a compaction strategy that uses summary information is a bit tricky. See the source code for {@link TooManyDeletesCompactionStrategy} as an example
 * of a compaction strategy.
 *
 * @see Summarizer
 * @see TableOperations#addSummarizers(String, SummarizerConfiguration...)
 * @see AccumuloFileOutputFormat#setSummarizers(org.apache.hadoop.mapred.JobConf, SummarizerConfiguration...)
 * @see WriterOptions#withSummarizers(SummarizerConfiguration...)
 */
public List<Summary> getSummaries(Collection<FileRef> files, Predicate<SummarizerConfiguration> summarySelector) throws IOException {
    Preconditions.checkState(volumeManager != null, "Getting summaries is not supported at this time.  Its only supported when CompactionStrategy.gatherInformation() is called.");
    SummaryCollection sc = new SummaryCollection();
    SummarizerFactory factory = new SummarizerFactory(tableConfig);
    for (FileRef file : files) {
        FileSystem fs = volumeManager.getVolumeByPath(file.path()).getFileSystem();
        Configuration conf = CachedConfiguration.getInstance();
        SummaryCollection fsc = SummaryReader.load(fs, conf, tableConfig, factory, file.path(), summarySelector, summaryCache, indexCache).getSummaries(Collections.singletonList(new Gatherer.RowRange(extent)));
        sc.merge(fsc, factory);
    }
    return sc.getSummaries();
}
Also used : SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) FileRef(org.apache.accumulo.server.fs.FileRef) FileSystem(org.apache.hadoop.fs.FileSystem) SummarizerFactory(org.apache.accumulo.core.summary.SummarizerFactory) SummaryCollection(org.apache.accumulo.core.summary.SummaryCollection)

Aggregations

FileRef (org.apache.accumulo.server.fs.FileRef)62 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)36 Value (org.apache.accumulo.core.data.Value)17 Key (org.apache.accumulo.core.data.Key)16 ArrayList (java.util.ArrayList)15 HashMap (java.util.HashMap)13 KeyExtent (org.apache.accumulo.core.data.impl.KeyExtent)13 IOException (java.io.IOException)12 Test (org.junit.Test)12 Text (org.apache.hadoop.io.Text)11 Mutation (org.apache.accumulo.core.data.Mutation)10 VolumeManager (org.apache.accumulo.server.fs.VolumeManager)10 Scanner (org.apache.accumulo.core.client.Scanner)9 PartialKey (org.apache.accumulo.core.data.PartialKey)9 TreeMap (java.util.TreeMap)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 Path (org.apache.hadoop.fs.Path)8 HashSet (java.util.HashSet)7 IsolatedScanner (org.apache.accumulo.core.client.IsolatedScanner)6 ScannerImpl (org.apache.accumulo.core.client.impl.ScannerImpl)6