Search in sources :

Example 6 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class DatafileManager method waitForScansToFinish.

private TreeSet<FileRef> waitForScansToFinish(Set<FileRef> pathsToWaitFor, boolean blockNewScans, long maxWaitTime) {
    long startTime = System.currentTimeMillis();
    TreeSet<FileRef> inUse = new TreeSet<>();
    Span waitForScans = Trace.start("waitForScans");
    try {
        synchronized (tablet) {
            if (blockNewScans) {
                if (reservationsBlocked)
                    throw new IllegalStateException();
                reservationsBlocked = true;
            }
            for (FileRef path : pathsToWaitFor) {
                while (fileScanReferenceCounts.get(path) > 0 && System.currentTimeMillis() - startTime < maxWaitTime) {
                    try {
                        tablet.wait(100);
                    } catch (InterruptedException e) {
                        log.warn("{}", e.getMessage(), e);
                    }
                }
            }
            for (FileRef path : pathsToWaitFor) {
                if (fileScanReferenceCounts.get(path) > 0)
                    inUse.add(path);
            }
            if (blockNewScans) {
                reservationsBlocked = false;
                tablet.notifyAll();
            }
        }
    } finally {
        waitForScans.stop();
    }
    return inUse;
}
Also used : FileRef(org.apache.accumulo.server.fs.FileRef) TreeSet(java.util.TreeSet) Span(org.apache.accumulo.core.trace.Span)

Example 7 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class DatafileManager method reserveMergingMinorCompactionFile.

FileRef reserveMergingMinorCompactionFile() {
    if (mergingMinorCompactionFile != null)
        throw new IllegalStateException("Tried to reserve merging minor compaction file when already reserved  : " + mergingMinorCompactionFile);
    if (tablet.getExtent().isRootTablet())
        return null;
    int maxFiles = tablet.getTableConfiguration().getMaxFilesPerTablet();
    // are canceled
    if (majorCompactingFiles.size() > 0 && datafileSizes.size() == maxFiles)
        return null;
    if (datafileSizes.size() >= maxFiles) {
        // find the smallest file
        long maxFileSize = Long.MAX_VALUE;
        maxMergingMinorCompactionFileSize = ConfigurationTypeHelper.getFixedMemoryAsBytes(tablet.getTableConfiguration().get(Property.TABLE_MINC_MAX_MERGE_FILE_SIZE));
        if (maxMergingMinorCompactionFileSize > 0) {
            maxFileSize = maxMergingMinorCompactionFileSize;
        }
        long min = maxFileSize;
        FileRef minName = null;
        for (Entry<FileRef, DataFileValue> entry : datafileSizes.entrySet()) {
            if (entry.getValue().getSize() <= min && !majorCompactingFiles.contains(entry.getKey())) {
                min = entry.getValue().getSize();
                minName = entry.getKey();
            }
        }
        if (minName == null)
            return null;
        mergingMinorCompactionFile = minName;
        return minName;
    }
    return null;
}
Also used : DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) FileRef(org.apache.accumulo.server.fs.FileRef)

Example 8 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class DatafileManager method bringMinorCompactionOnline.

void bringMinorCompactionOnline(FileRef tmpDatafile, FileRef newDatafile, FileRef absMergeFile, DataFileValue dfv, CommitSession commitSession, long flushId) throws IOException {
    IZooReaderWriter zoo = ZooReaderWriter.getInstance();
    if (tablet.getExtent().isRootTablet()) {
        try {
            if (!zoo.isLockHeld(tablet.getTabletServer().getLock().getLockID())) {
                throw new IllegalStateException();
            }
        } catch (Exception e) {
            throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
        }
    }
    // always exist
    do {
        try {
            if (dfv.getNumEntries() == 0) {
                tablet.getTabletServer().getFileSystem().deleteRecursively(tmpDatafile.path());
            } else {
                if (tablet.getTabletServer().getFileSystem().exists(newDatafile.path())) {
                    log.warn("Target map file already exist {}", newDatafile);
                    tablet.getTabletServer().getFileSystem().deleteRecursively(newDatafile.path());
                }
                rename(tablet.getTabletServer().getFileSystem(), tmpDatafile.path(), newDatafile.path());
            }
            break;
        } catch (IOException ioe) {
            log.warn("Tablet " + tablet.getExtent() + " failed to rename " + newDatafile + " after MinC, will retry in 60 secs...", ioe);
            sleepUninterruptibly(1, TimeUnit.MINUTES);
        }
    } while (true);
    long t1, t2;
    // the code below always assumes merged files are in use by scans... this must be done
    // because the in memory list of files is not updated until after the metadata table
    // therefore the file is available to scans until memory is updated, but want to ensure
    // the file is not available for garbage collection... if memory were updated
    // before this point (like major compactions do), then the following code could wait
    // for scans to finish like major compactions do.... used to wait for scans to finish
    // here, but that was incorrect because a scan could start after waiting but before
    // memory was updated... assuming the file is always in use by scans leads to
    // one uneeded metadata update when it was not actually in use
    Set<FileRef> filesInUseByScans = Collections.emptySet();
    if (absMergeFile != null)
        filesInUseByScans = Collections.singleton(absMergeFile);
    // this metadata write does not go up... it goes sideways or to itself
    if (absMergeFile != null)
        MetadataTableUtil.addDeleteEntries(tablet.getExtent(), Collections.singleton(absMergeFile), tablet.getTabletServer());
    Set<String> unusedWalLogs = tablet.beginClearingUnusedLogs();
    boolean replicate = ReplicationConfigurationUtil.isEnabled(tablet.getExtent(), tablet.getTableConfiguration());
    Set<String> logFileOnly = null;
    if (replicate) {
        // unusedWalLogs is of the form host/fileURI, need to strip off the host portion
        logFileOnly = new HashSet<>();
        for (String unusedWalLog : unusedWalLogs) {
            int index = unusedWalLog.indexOf('/');
            if (-1 == index) {
                log.warn("Could not find host component to strip from DFSLogger representation of WAL");
            } else {
                unusedWalLog = unusedWalLog.substring(index + 1);
            }
            logFileOnly.add(unusedWalLog);
        }
    }
    try {
        // the order of writing to metadata and walog is important in the face of machine/process failures
        // need to write to metadata before writing to walog, when things are done in the reverse order
        // data could be lost... the minor compaction start even should be written before the following metadata
        // write is made
        tablet.updateTabletDataFile(commitSession.getMaxCommittedTime(), newDatafile, absMergeFile, dfv, unusedWalLogs, filesInUseByScans, flushId);
        // tablet is online and thus these WALs are referenced by that tablet. Therefore, the WAL replication status cannot be 'closed'.
        if (replicate) {
            if (log.isDebugEnabled()) {
                log.debug("Recording that data has been ingested into {} using {}", tablet.getExtent(), logFileOnly);
            }
            for (String logFile : logFileOnly) {
                ReplicationTableUtil.updateFiles(tablet.getTabletServer(), tablet.getExtent(), logFile, StatusUtil.openWithUnknownLength());
            }
        }
    } finally {
        tablet.finishClearingUnusedLogs();
    }
    do {
        try {
            // the purpose of making this update use the new commit session, instead of the old one passed in,
            // is because the new one will reference the logs used by current memory...
            tablet.getTabletServer().minorCompactionFinished(tablet.getTabletMemory().getCommitSession(), newDatafile.toString(), commitSession.getWALogSeq() + 2);
            break;
        } catch (IOException e) {
            log.error("Failed to write to write-ahead log " + e.getMessage() + " will retry", e);
            sleepUninterruptibly(1, TimeUnit.SECONDS);
        }
    } while (true);
    synchronized (tablet) {
        t1 = System.currentTimeMillis();
        if (datafileSizes.containsKey(newDatafile)) {
            log.error("Adding file that is already in set {}", newDatafile);
        }
        if (dfv.getNumEntries() > 0) {
            datafileSizes.put(newDatafile, dfv);
        }
        if (absMergeFile != null) {
            datafileSizes.remove(absMergeFile);
        }
        unreserveMergingMinorCompactionFile(absMergeFile);
        tablet.flushComplete(flushId);
        t2 = System.currentTimeMillis();
    }
    // must do this after list of files in memory is updated above
    removeFilesAfterScan(filesInUseByScans);
    if (absMergeFile != null)
        log.debug("TABLET_HIST {} MinC [{},memory] -> {}", tablet.getExtent(), absMergeFile, newDatafile);
    else
        log.debug("TABLET_HIST {} MinC [memory] -> {}", tablet.getExtent(), newDatafile);
    log.debug(String.format("MinC finish lock %.2f secs %s", (t2 - t1) / 1000.0, tablet.getExtent().toString()));
    long splitSize = tablet.getTableConfiguration().getAsBytes(Property.TABLE_SPLIT_THRESHOLD);
    if (dfv.getSize() > splitSize) {
        log.debug(String.format("Minor Compaction wrote out file larger than split threshold.  split threshold = %,d  file size = %,d", splitSize, dfv.getSize()));
    }
}
Also used : FileRef(org.apache.accumulo.server.fs.FileRef) IZooReaderWriter(org.apache.accumulo.fate.zookeeper.IZooReaderWriter) IOException(java.io.IOException) IOException(java.io.IOException)

Example 9 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class DatafileManager method removeFilesAfterScan.

void removeFilesAfterScan(Set<FileRef> scanFiles) {
    if (scanFiles.size() == 0)
        return;
    Set<FileRef> filesToDelete = new HashSet<>();
    synchronized (tablet) {
        for (FileRef path : scanFiles) {
            if (fileScanReferenceCounts.get(path) == 0)
                filesToDelete.add(path);
            else
                filesToDeleteAfterScan.add(path);
        }
    }
    if (filesToDelete.size() > 0) {
        log.debug("Removing scan refs from metadata {} {}", tablet.getExtent(), filesToDelete);
        MetadataTableUtil.removeScanFiles(tablet.getExtent(), filesToDelete, tablet.getTabletServer(), tablet.getTabletServer().getLock());
    }
}
Also used : FileRef(org.apache.accumulo.server.fs.FileRef) HashSet(java.util.HashSet)

Example 10 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class MinorCompactionTask method run.

@Override
public void run() {
    tablet.minorCompactionStarted();
    ProbabilitySampler sampler = new ProbabilitySampler(tracePercent);
    Span minorCompaction = Trace.on("minorCompaction", sampler);
    try {
        FileRef newMapfileLocation = tablet.getNextMapFilename(mergeFile == null ? "F" : "M");
        FileRef tmpFileRef = new FileRef(newMapfileLocation.path() + "_tmp");
        Span span = Trace.start("waitForCommits");
        synchronized (tablet) {
            commitSession.waitForCommitsToFinish();
        }
        span.stop();
        span = Trace.start("start");
        while (true) {
            try {
                // the purpose of the minor compaction start event is to keep track of the filename... in the case
                // where the metadata table write for the minor compaction finishes and the process dies before
                // writing the minor compaction finish event, then the start event+filename in metadata table will
                // prevent recovery of duplicate data... the minor compaction start event could be written at any time
                // before the metadata write for the minor compaction
                tablet.getTabletServer().minorCompactionStarted(commitSession, commitSession.getWALogSeq() + 1, newMapfileLocation.path().toString());
                break;
            } catch (IOException e) {
                log.warn("Failed to write to write ahead log {}", e.getMessage(), e);
            }
        }
        span.stop();
        span = Trace.start("compact");
        this.stats = tablet.minorCompact(tablet.getTabletServer().getFileSystem(), tablet.getTabletMemory().getMinCMemTable(), tmpFileRef, newMapfileLocation, mergeFile, true, queued, commitSession, flushId, mincReason);
        span.stop();
        minorCompaction.data("extent", tablet.getExtent().toString());
        minorCompaction.data("numEntries", Long.toString(this.stats.getNumEntries()));
        minorCompaction.data("size", Long.toString(this.stats.getSize()));
        minorCompaction.stop();
        if (tablet.needsSplit()) {
            tablet.getTabletServer().executeSplit(tablet);
        } else {
            tablet.initiateMajorCompaction(MajorCompactionReason.NORMAL);
        }
    } catch (Throwable t) {
        log.error("Unknown error during minor compaction for extent: " + tablet.getExtent(), t);
        throw new RuntimeException(t);
    } finally {
        tablet.minorCompactionComplete();
        minorCompaction.stop();
    }
}
Also used : ProbabilitySampler(org.apache.accumulo.core.trace.ProbabilitySampler) FileRef(org.apache.accumulo.server.fs.FileRef) IOException(java.io.IOException) Span(org.apache.accumulo.core.trace.Span)

Aggregations

FileRef (org.apache.accumulo.server.fs.FileRef)62 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)36 Value (org.apache.accumulo.core.data.Value)17 Key (org.apache.accumulo.core.data.Key)16 ArrayList (java.util.ArrayList)15 HashMap (java.util.HashMap)13 KeyExtent (org.apache.accumulo.core.data.impl.KeyExtent)13 IOException (java.io.IOException)12 Test (org.junit.Test)12 Text (org.apache.hadoop.io.Text)11 Mutation (org.apache.accumulo.core.data.Mutation)10 VolumeManager (org.apache.accumulo.server.fs.VolumeManager)10 Scanner (org.apache.accumulo.core.client.Scanner)9 PartialKey (org.apache.accumulo.core.data.PartialKey)9 TreeMap (java.util.TreeMap)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 Path (org.apache.hadoop.fs.Path)8 HashSet (java.util.HashSet)7 IsolatedScanner (org.apache.accumulo.core.client.IsolatedScanner)6 ScannerImpl (org.apache.accumulo.core.client.impl.ScannerImpl)6