use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class DatafileManager method bringMajorCompactionOnline.
StoredTabletFile bringMajorCompactionOnline(Set<StoredTabletFile> oldDatafiles, TabletFile tmpDatafile, Long compactionId, Set<StoredTabletFile> selectedFiles, DataFileValue dfv, Optional<ExternalCompactionId> ecid) throws IOException {
final KeyExtent extent = tablet.getExtent();
VolumeManager vm = tablet.getTabletServer().getContext().getVolumeManager();
long t1, t2;
TabletFile newDatafile = CompactableUtils.computeCompactionFileDest(tmpDatafile);
if (vm.exists(newDatafile.getPath())) {
log.error("Target map file already exist " + newDatafile, new Exception());
throw new IllegalStateException("Target map file already exist " + newDatafile);
}
if (dfv.getNumEntries() == 0) {
vm.deleteRecursively(tmpDatafile.getPath());
} else {
// rename before putting in metadata table, so files in metadata table should
// always exist
rename(vm, tmpDatafile.getPath(), newDatafile.getPath());
}
TServerInstance lastLocation = null;
// calling insert to get the new file before inserting into the metadata
StoredTabletFile newFile = newDatafile.insert();
Long compactionIdToWrite = null;
synchronized (tablet) {
t1 = System.currentTimeMillis();
Preconditions.checkState(datafileSizes.keySet().containsAll(oldDatafiles), "Compacted files %s are not a subset of tablet files %s", oldDatafiles, datafileSizes.keySet());
if (dfv.getNumEntries() > 0) {
Preconditions.checkState(!datafileSizes.containsKey(newFile), "New compaction file %s already exist in tablet files %s", newFile, datafileSizes.keySet());
}
tablet.incrementDataSourceDeletions();
datafileSizes.keySet().removeAll(oldDatafiles);
if (dfv.getNumEntries() > 0) {
datafileSizes.put(newFile, dfv);
// could be used by a follow on compaction in a multipass compaction
}
updateCount++;
tablet.computeNumEntries();
lastLocation = tablet.resetLastLocation();
if (compactionId != null && Collections.disjoint(selectedFiles, datafileSizes.keySet())) {
compactionIdToWrite = compactionId;
}
t2 = System.currentTimeMillis();
}
// known consistency issue between minor and major compactions - see ACCUMULO-18
Set<StoredTabletFile> filesInUseByScans = waitForScansToFinish(oldDatafiles);
if (!filesInUseByScans.isEmpty())
log.debug("Adding scan refs to metadata {} {}", extent, filesInUseByScans);
ManagerMetadataUtil.replaceDatafiles(tablet.getContext(), extent, oldDatafiles, filesInUseByScans, newFile, compactionIdToWrite, dfv, tablet.getTabletServer().getClientAddressString(), lastLocation, tablet.getTabletServer().getLock(), ecid);
tablet.setLastCompactionID(compactionIdToWrite);
removeFilesAfterScan(filesInUseByScans);
if (log.isTraceEnabled()) {
log.trace(String.format("MajC finish lock %.2f secs", (t2 - t1) / 1000.0));
}
return newFile;
}
use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class DatafileManager method reserveFilesForScan.
Pair<Long, Map<TabletFile, DataFileValue>> reserveFilesForScan() {
synchronized (tablet) {
Set<StoredTabletFile> absFilePaths = new HashSet<>(datafileSizes.keySet());
long rid = nextScanReservationId++;
scanFileReservations.put(rid, absFilePaths);
Map<TabletFile, DataFileValue> ret = new HashMap<>();
for (StoredTabletFile path : absFilePaths) {
fileScanReferenceCounts.increment(path, 1);
ret.put(path, datafileSizes.get(path));
}
return new Pair<>(rid, ret);
}
}
use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class DatafileManager method importMapFiles.
public Collection<StoredTabletFile> importMapFiles(long tid, Map<TabletFile, DataFileValue> paths, boolean setTime) throws IOException {
String bulkDir = null;
// once tablet files are inserted into the metadata they will become StoredTabletFiles
Map<StoredTabletFile, DataFileValue> newFiles = new HashMap<>(paths.size());
for (TabletFile tpath : paths.keySet()) {
boolean inTheRightDirectory = false;
Path parent = tpath.getPath().getParent().getParent();
for (String tablesDir : tablet.getContext().getTablesDirs()) {
if (parent.equals(new Path(tablesDir, tablet.getExtent().tableId().canonical()))) {
inTheRightDirectory = true;
break;
}
}
if (!inTheRightDirectory) {
throw new IOException("Data file " + tpath + " not in table dirs");
}
if (bulkDir == null)
bulkDir = tpath.getTabletDir();
else if (!bulkDir.equals(tpath.getTabletDir()))
throw new IllegalArgumentException("bulk files in different dirs " + bulkDir + " " + tpath);
}
if (tablet.getExtent().isMeta()) {
throw new IllegalArgumentException("Can not import files to a metadata tablet");
}
synchronized (bulkFileImportLock) {
if (!paths.isEmpty()) {
long bulkTime = Long.MIN_VALUE;
if (setTime) {
for (DataFileValue dfv : paths.values()) {
long nextTime = tablet.getAndUpdateTime();
if (nextTime < bulkTime)
throw new IllegalStateException("Time went backwards unexpectedly " + nextTime + " " + bulkTime);
bulkTime = nextTime;
dfv.setTime(bulkTime);
}
}
newFiles = tablet.updatePersistedTime(bulkTime, paths, tid);
}
}
synchronized (tablet) {
for (Entry<StoredTabletFile, DataFileValue> tpath : newFiles.entrySet()) {
if (datafileSizes.containsKey(tpath.getKey())) {
log.error("Adding file that is already in set {}", tpath.getKey());
}
datafileSizes.put(tpath.getKey(), tpath.getValue());
}
updateCount++;
tablet.getTabletResources().importedMapFiles();
tablet.computeNumEntries();
}
for (Entry<StoredTabletFile, DataFileValue> entry : newFiles.entrySet()) {
TabletLogger.bulkImported(tablet.getExtent(), entry.getKey());
}
return newFiles.keySet();
}
use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class CompactableUtilsTest method testEquivalence.
@Test
public void testEquivalence() {
TabletFile expected = new TabletFile(new Path("hdfs://localhost:8020/accumulo/tables/2a/default_tablet/F0000070.rf"));
TabletFile tmpFile = new TabletFile(new Path(expected.getMetaInsert() + "_tmp"));
TabletFile dest = CompactableUtils.computeCompactionFileDest(tmpFile);
assertEquals(expected, dest);
}
use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class MajorCompactionRequest method getSummaries.
/**
* Returns all summaries present in each file.
*
* <p>
* This method can only be called from
* {@link CompactionStrategy#gatherInformation(MajorCompactionRequest)}. Unfortunately,
* {@code gatherInformation()} is not called before
* {@link CompactionStrategy#shouldCompact(MajorCompactionRequest)}. Therefore
* {@code shouldCompact()} should just return true when a compactions strategy wants to use
* summary information.
*
* <p>
* When using summaries to make compaction decisions, it's important to ensure that all summary
* data fits in the tablet server summary cache. The size of this cache is configured by code
* tserver.cache.summary.size}. Also it's important to use the summarySelector predicate to only
* retrieve the needed summary data. Otherwise unneeded summary data could be brought into the
* cache.
*
* <p>
* Some files may contain data outside of a tablets range. When {@link Summarizer}'s generate
* small amounts of summary data, multiple summaries may be stored within a file for different row
* ranges. This will allow more accurate summaries to be returned for the case where a file has
* data outside a tablets range. However, some summary data outside of the tablets range may still
* be included. When this happens {@link FileStatistics#getExtra()} will be non zero. Also, its
* good to be aware of the other potential causes of inaccuracies
* {@link FileStatistics#getInaccurate()}
*
* <p>
* When this method is called with multiple files, it will automatically merge summary data using
* {@link Combiner#merge(Map, Map)}. If summary information is needed for each file, then just
* call this method for each file.
*
* <p>
* Writing a compaction strategy that uses summary information is a bit tricky. See the source
* code for {@link TooManyDeletesCompactionStrategy} as an example of a compaction strategy.
*
* @see Summarizer
* @see TableOperations#addSummarizers(String, SummarizerConfiguration...)
* @see WriterOptions#withSummarizers(SummarizerConfiguration...)
*/
public List<Summary> getSummaries(Collection<StoredTabletFile> files, Predicate<SummarizerConfiguration> summarySelector) {
Objects.requireNonNull(volumeManager, "Getting summaries is not supported at this time. It's only supported when " + "CompactionStrategy.gatherInformation() is called.");
SummaryCollection sc = new SummaryCollection();
SummarizerFactory factory = new SummarizerFactory(tableConfig);
for (TabletFile file : files) {
FileSystem fs = volumeManager.getFileSystemByPath(file.getPath());
Configuration conf = context.getHadoopConf();
SummaryCollection fsc = SummaryReader.load(fs, conf, factory, file.getPath(), summarySelector, summaryCache, indexCache, fileLenCache, context.getCryptoService()).getSummaries(Collections.singletonList(new Gatherer.RowRange(extent)));
sc.merge(fsc, factory);
}
return sc.getSummaries();
}
Aggregations