use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.
the class DatafileManager method returnFilesForScan.
void returnFilesForScan(Long reservationId) {
final Set<FileRef> filesToDelete = new HashSet<>();
synchronized (tablet) {
Set<FileRef> absFilePaths = scanFileReservations.remove(reservationId);
if (absFilePaths == null)
throw new IllegalArgumentException("Unknown scan reservation id " + reservationId);
boolean notify = false;
for (FileRef path : absFilePaths) {
long refCount = fileScanReferenceCounts.decrement(path, 1);
if (refCount == 0) {
if (filesToDeleteAfterScan.remove(path))
filesToDelete.add(path);
notify = true;
} else if (refCount < 0)
throw new IllegalStateException("Scan ref count for " + path + " is " + refCount);
}
if (notify)
tablet.notifyAll();
}
if (filesToDelete.size() > 0) {
log.debug("Removing scan refs from metadata {} {}", tablet.getExtent(), filesToDelete);
MetadataTableUtil.removeScanFiles(tablet.getExtent(), filesToDelete, tablet.getTabletServer(), tablet.getTabletServer().getLock());
}
}
use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.
the class DatafileManager method bringMajorCompactionOnline.
void bringMajorCompactionOnline(Set<FileRef> oldDatafiles, FileRef tmpDatafile, FileRef newDatafile, Long compactionId, DataFileValue dfv) throws IOException {
final KeyExtent extent = tablet.getExtent();
long t1, t2;
if (!extent.isRootTablet()) {
if (tablet.getTabletServer().getFileSystem().exists(newDatafile.path())) {
log.error("Target map file already exist " + newDatafile, new Exception());
throw new IllegalStateException("Target map file already exist " + newDatafile);
}
// rename before putting in metadata table, so files in metadata table should
// always exist
rename(tablet.getTabletServer().getFileSystem(), tmpDatafile.path(), newDatafile.path());
if (dfv.getNumEntries() == 0) {
tablet.getTabletServer().getFileSystem().deleteRecursively(newDatafile.path());
}
}
TServerInstance lastLocation = null;
synchronized (tablet) {
t1 = System.currentTimeMillis();
IZooReaderWriter zoo = ZooReaderWriter.getInstance();
tablet.incrementDataSourceDeletions();
if (extent.isRootTablet()) {
waitForScansToFinish(oldDatafiles, true, Long.MAX_VALUE);
try {
if (!zoo.isLockHeld(tablet.getTabletServer().getLock().getLockID())) {
throw new IllegalStateException();
}
} catch (Exception e) {
throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
}
// mark files as ready for deletion, but
// do not delete them until we successfully
// rename the compacted map file, in case
// the system goes down
RootFiles.replaceFiles(tablet.getTableConfiguration(), tablet.getTabletServer().getFileSystem(), tablet.getLocation(), oldDatafiles, tmpDatafile, newDatafile);
}
// atomically remove old files and add new file
for (FileRef oldDatafile : oldDatafiles) {
if (!datafileSizes.containsKey(oldDatafile)) {
log.error("file does not exist in set {}", oldDatafile);
}
datafileSizes.remove(oldDatafile);
majorCompactingFiles.remove(oldDatafile);
}
if (datafileSizes.containsKey(newDatafile)) {
log.error("Adding file that is already in set {}", newDatafile);
}
if (dfv.getNumEntries() > 0) {
datafileSizes.put(newDatafile, dfv);
}
// could be used by a follow on compaction in a multipass compaction
majorCompactingFiles.add(newDatafile);
tablet.computeNumEntries();
lastLocation = tablet.resetLastLocation();
tablet.setLastCompactionID(compactionId);
t2 = System.currentTimeMillis();
}
if (!extent.isRootTablet()) {
Set<FileRef> filesInUseByScans = waitForScansToFinish(oldDatafiles, false, 10000);
if (filesInUseByScans.size() > 0)
log.debug("Adding scan refs to metadata {} {}", extent, filesInUseByScans);
MasterMetadataUtil.replaceDatafiles(tablet.getTabletServer(), extent, oldDatafiles, filesInUseByScans, newDatafile, compactionId, dfv, tablet.getTabletServer().getClientAddressString(), lastLocation, tablet.getTabletServer().getLock());
removeFilesAfterScan(filesInUseByScans);
}
log.debug(String.format("MajC finish lock %.2f secs", (t2 - t1) / 1000.0));
log.debug("TABLET_HIST {} MajC --> {}", oldDatafiles, newDatafile);
}
use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.
the class DatafileManager method importMapFiles.
public void importMapFiles(long tid, Map<FileRef, DataFileValue> pathsString, boolean setTime) throws IOException {
String bulkDir = null;
Map<FileRef, DataFileValue> paths = new HashMap<>();
for (Entry<FileRef, DataFileValue> entry : pathsString.entrySet()) paths.put(entry.getKey(), entry.getValue());
for (FileRef tpath : paths.keySet()) {
boolean inTheRightDirectory = false;
Path parent = tpath.path().getParent().getParent();
for (String tablesDir : ServerConstants.getTablesDirs()) {
if (parent.equals(new Path(tablesDir, tablet.getExtent().getTableId().canonicalID()))) {
inTheRightDirectory = true;
break;
}
}
if (!inTheRightDirectory) {
throw new IOException("Data file " + tpath + " not in table dirs");
}
if (bulkDir == null)
bulkDir = tpath.path().getParent().toString();
else if (!bulkDir.equals(tpath.path().getParent().toString()))
throw new IllegalArgumentException("bulk files in different dirs " + bulkDir + " " + tpath);
}
if (tablet.getExtent().isMeta()) {
throw new IllegalArgumentException("Can not import files to a metadata tablet");
}
synchronized (bulkFileImportLock) {
if (paths.size() > 0) {
long bulkTime = Long.MIN_VALUE;
if (setTime) {
for (DataFileValue dfv : paths.values()) {
long nextTime = tablet.getAndUpdateTime();
if (nextTime < bulkTime)
throw new IllegalStateException("Time went backwards unexpectedly " + nextTime + " " + bulkTime);
bulkTime = nextTime;
dfv.setTime(bulkTime);
}
}
tablet.updatePersistedTime(bulkTime, paths, tid);
}
}
synchronized (tablet) {
for (Entry<FileRef, DataFileValue> tpath : paths.entrySet()) {
if (datafileSizes.containsKey(tpath.getKey())) {
log.error("Adding file that is already in set {}", tpath.getKey());
}
datafileSizes.put(tpath.getKey(), tpath.getValue());
}
tablet.getTabletResources().importedMapFiles();
tablet.computeNumEntries();
}
for (Entry<FileRef, DataFileValue> entry : paths.entrySet()) {
log.debug("TABLET_HIST {} import {} {}", tablet.getExtent(), entry.getKey(), entry.getValue());
}
}
use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.
the class DatafileManager method reserveFilesForScan.
Pair<Long, Map<FileRef, DataFileValue>> reserveFilesForScan() {
synchronized (tablet) {
while (reservationsBlocked) {
try {
tablet.wait(50);
} catch (InterruptedException e) {
log.warn("{}", e.getMessage(), e);
}
}
Set<FileRef> absFilePaths = new HashSet<>(datafileSizes.keySet());
long rid = nextScanReservationId++;
scanFileReservations.put(rid, absFilePaths);
Map<FileRef, DataFileValue> ret = new HashMap<>();
for (FileRef path : absFilePaths) {
fileScanReferenceCounts.increment(path, 1);
ret.put(path, datafileSizes.get(path));
}
return new Pair<>(rid, ret);
}
}
use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.
the class MajorCompactionRequest method getSummaries.
/**
* Returns all summaries present in each file.
*
* <p>
* This method can only be called from {@link CompactionStrategy#gatherInformation(MajorCompactionRequest)}. Unfortunately, {@code gatherInformation()} is not
* called before {@link CompactionStrategy#shouldCompact(MajorCompactionRequest)}. Therefore {@code shouldCompact()} should just return true when a
* compactions strategy wants to use summary information.
*
* <p>
* When using summaries to make compaction decisions, its important to ensure that all summary data fits in the tablet server summary cache. The size of this
* cache is configured by code tserver.cache.summary.size}. Also its important to use the summarySelector predicate to only retrieve the needed summary data.
* Otherwise uneeded summary data could be brought into the cache.
*
* <p>
* Some files may contain data outside of a tablets range. When {@link Summarizer}'s generate small amounts of summary data, multiple summaries may be stored
* within a file for different row ranges. This will allow more accurate summaries to be returned for the case where a file has data outside a tablets range.
* However, some summary data outside of the tablets range may still be included. When this happens {@link FileStatistics#getExtra()} will be non zero. Also,
* its good to be aware of the other potential causes of inaccuracies {@link FileStatistics#getInaccurate()}
*
* <p>
* When this method is called with multiple files, it will automatically merge summary data using {@link Combiner#merge(Map, Map)}. If summary information is
* needed for each file, then just call this method for each file.
*
* <p>
* Writing a compaction strategy that uses summary information is a bit tricky. See the source code for {@link TooManyDeletesCompactionStrategy} as an example
* of a compaction strategy.
*
* @see Summarizer
* @see TableOperations#addSummarizers(String, SummarizerConfiguration...)
* @see AccumuloFileOutputFormat#setSummarizers(org.apache.hadoop.mapred.JobConf, SummarizerConfiguration...)
* @see WriterOptions#withSummarizers(SummarizerConfiguration...)
*/
public List<Summary> getSummaries(Collection<FileRef> files, Predicate<SummarizerConfiguration> summarySelector) throws IOException {
Preconditions.checkState(volumeManager != null, "Getting summaries is not supported at this time. Its only supported when CompactionStrategy.gatherInformation() is called.");
SummaryCollection sc = new SummaryCollection();
SummarizerFactory factory = new SummarizerFactory(tableConfig);
for (FileRef file : files) {
FileSystem fs = volumeManager.getVolumeByPath(file.path()).getFileSystem();
Configuration conf = CachedConfiguration.getInstance();
SummaryCollection fsc = SummaryReader.load(fs, conf, tableConfig, factory, file.path(), summarySelector, summaryCache, indexCache).getSummaries(Collections.singletonList(new Gatherer.RowRange(extent)));
sc.merge(fsc, factory);
}
return sc.getSummaries();
}
Aggregations