use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class CompactableUtils method computeCompactionFileDest.
public static TabletFile computeCompactionFileDest(TabletFile tmpFile) {
String newFilePath = tmpFile.getMetaInsert();
int idx = newFilePath.indexOf("_tmp");
if (idx > 0) {
newFilePath = newFilePath.substring(0, idx);
} else {
throw new IllegalArgumentException("Expected compaction tmp file " + tmpFile.getMetaInsert() + " to have suffix '_tmp'");
}
return new TabletFile(new Path(newFilePath));
}
use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class ScanDataSource method createIterator.
private SortedKeyValueIterator<Key, Value> createIterator() throws IOException {
Map<TabletFile, DataFileValue> files;
SamplerConfigurationImpl samplerConfig = scanParams.getSamplerConfigurationImpl();
synchronized (tablet) {
if (memIters != null)
throw new IllegalStateException("Tried to create new scan iterator w/o releasing memory");
if (tablet.isClosed())
throw new TabletClosedException();
if (interruptFlag.get())
throw new IterationInterruptedException(tablet.getExtent() + " " + interruptFlag.hashCode());
// only acquire the file manager when we know the tablet is open
if (fileManager == null) {
fileManager = tablet.getTabletResources().newScanFileManager(scanParams.getScanDispatch());
tablet.getTabletServer().getScanMetrics().incrementOpenFiles(fileManager.getNumOpenFiles());
tablet.addActiveScans(this);
}
if (fileManager.getNumOpenFiles() != 0)
throw new IllegalStateException("Tried to create new scan iterator w/o releasing files");
// set this before trying to get iterators in case
// getIterators() throws an exception
expectedDeletionCount = tablet.getDataSourceDeletions();
memIters = tablet.getTabletMemory().getIterators(samplerConfig);
Pair<Long, Map<TabletFile, DataFileValue>> reservation = tablet.getDatafileManager().reserveFilesForScan();
fileReservationId = reservation.getFirst();
files = reservation.getSecond();
}
Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, scanParams.isIsolated(), samplerConfig);
for (SortedKeyValueIterator<Key, Value> skvi : Iterables.concat(mapfiles, memIters)) ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(mapfiles.size() + memIters.size());
iters.addAll(mapfiles);
iters.addAll(memIters);
MultiIterator multiIter = new MultiIterator(iters, tablet.getExtent());
TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(tablet.getTabletServer().getContext(), IteratorScope.scan, tablet.getTableConfiguration(), tablet.getExtent().tableId(), fileManager, files, scanParams.getAuthorizations(), samplerConfig, new ArrayList<>());
statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, tablet.getScannedCounter());
SortedKeyValueIterator<Key, Value> visFilter = SystemIteratorUtil.setupSystemScanIterators(statsIterator, scanParams.getColumnSet(), scanParams.getAuthorizations(), defaultLabels, tablet.getTableConfiguration());
if (loadIters) {
List<IterInfo> iterInfos;
Map<String, Map<String, String>> iterOpts;
ParsedIteratorConfig pic = tablet.getTableConfiguration().getParsedIteratorConfig(IteratorScope.scan);
if (scanParams.getSsiList().isEmpty() && scanParams.getSsio().isEmpty()) {
// No scan time iterator options were set, so can just use the pre-parsed table iterator
// options.
iterInfos = pic.getIterInfo();
iterOpts = pic.getOpts();
} else {
// Scan time iterator options were set, so need to merge those with pre-parsed table
// iterator options.
iterOpts = new HashMap<>(pic.getOpts().size() + scanParams.getSsio().size());
iterInfos = new ArrayList<>(pic.getIterInfo().size() + scanParams.getSsiList().size());
IterConfigUtil.mergeIteratorConfig(iterInfos, iterOpts, pic.getIterInfo(), pic.getOpts(), scanParams.getSsiList(), scanParams.getSsio());
}
String context;
if (scanParams.getClassLoaderContext() != null) {
log.trace("Loading iterators for scan with scan context: {}", scanParams.getClassLoaderContext());
context = scanParams.getClassLoaderContext();
} else {
context = pic.getServiceEnv();
if (context != null) {
log.trace("Loading iterators for scan with table context: {}", scanParams.getClassLoaderContext());
} else {
log.trace("Loading iterators for scan");
}
}
IterLoad il = new IterLoad().iters(iterInfos).iterOpts(iterOpts).iterEnv(iterEnv).useAccumuloClassLoader(true).context(context);
return iterEnv.getTopLevelIterator(IterConfigUtil.loadIterators(visFilter, il));
} else {
return visFilter;
}
}
use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class MinorCompactionTask method run.
@Override
public void run() {
tablet.minorCompactionStarted();
try {
Span span = TraceUtil.startSpan(this.getClass(), "minorCompaction");
try (Scope scope = span.makeCurrent()) {
Span span2 = TraceUtil.startSpan(this.getClass(), "waitForCommits");
try (Scope scope2 = span2.makeCurrent()) {
synchronized (tablet) {
commitSession.waitForCommitsToFinish();
}
} catch (Exception e) {
TraceUtil.setException(span2, e, true);
throw e;
} finally {
span2.end();
}
TabletFile newFile = null;
TabletFile tmpFile = null;
Span span3 = TraceUtil.startSpan(this.getClass(), "start");
try (Scope scope3 = span3.makeCurrent()) {
while (true) {
try {
if (newFile == null) {
newFile = tablet.getNextMapFilename("F");
tmpFile = new TabletFile(new Path(newFile.getPathStr() + "_tmp"));
}
/*
* the purpose of the minor compaction start event is to keep track of the filename...
* in the case where the metadata table write for the minor compaction finishes and
* the process dies before writing the minor compaction finish event, then the start
* event+filename in metadata table will prevent recovery of duplicate data... the
* minor compaction start event could be written at any time before the metadata write
* for the minor compaction
*/
tablet.getTabletServer().minorCompactionStarted(commitSession, commitSession.getWALogSeq() + 1, newFile.getMetaInsert());
break;
} catch (IOException e) {
// An IOException could have occurred while creating the new file
if (newFile == null)
log.warn("Failed to create new file for minor compaction {}", e.getMessage(), e);
else
log.warn("Failed to write to write ahead log {}", e.getMessage(), e);
}
}
} catch (Exception e) {
TraceUtil.setException(span3, e, true);
throw e;
} finally {
span3.end();
}
Span span4 = TraceUtil.startSpan(this.getClass(), "compact");
try (Scope scope4 = span4.makeCurrent()) {
this.stats = tablet.minorCompact(tablet.getTabletMemory().getMinCMemTable(), tmpFile, newFile, queued, commitSession, flushId, mincReason);
} catch (Exception e) {
TraceUtil.setException(span4, e, true);
throw e;
} finally {
span4.end();
}
span.setAttribute("extent", tablet.getExtent().toString());
span.setAttribute("numEntries", Long.toString(this.stats.getNumEntries()));
span.setAttribute("size", Long.toString(this.stats.getSize()));
} catch (Exception e) {
TraceUtil.setException(span, e, true);
throw e;
} finally {
span.end();
}
if (tablet.needsSplit()) {
tablet.getTabletServer().executeSplit(tablet);
}
} catch (Exception e) {
log.error("Unknown error during minor compaction for extent: {}", tablet.getExtent(), e);
throw e;
} finally {
tablet.minorCompactionComplete();
}
}
use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class FileUtil method findMidPoint.
/**
* @param mapFiles
* - list MapFiles to find the mid point key
*
* ISSUES : This method used the index files to find the mid point. If the map files have
* different index intervals this method will not return an accurate mid point. Also, it
* would be tricky to use this method in conjunction with an in memory map because the
* indexing interval is unknown.
*/
public static SortedMap<Double, Key> findMidPoint(ServerContext context, String tabletDirectory, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, double minSplit, boolean useIndex) throws IOException {
Collection<TabletFile> origMapFiles = mapFiles;
Path tmpDir = null;
int maxToOpen = context.getConfiguration().getCount(Property.TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN);
ArrayList<FileSKVIterator> readers = new ArrayList<>(mapFiles.size());
try {
if (mapFiles.size() > maxToOpen) {
if (!useIndex)
throw new IOException("Cannot find mid point using data files, too many " + mapFiles.size());
tmpDir = createTmpDir(context, tabletDirectory);
log.debug("Too many indexes ({}) to open at once for {} {}, reducing in tmpDir = {}", mapFiles.size(), endRow, prevEndRow, tmpDir);
long t1 = System.currentTimeMillis();
mapFiles = reduceFiles(context, context.getHadoopConf(), prevEndRow, endRow, mapFiles, maxToOpen, tmpDir, 0);
long t2 = System.currentTimeMillis();
log.debug("Finished reducing indexes for {} {} in {}", endRow, prevEndRow, String.format("%6.2f secs", (t2 - t1) / 1000.0));
}
if (prevEndRow == null)
prevEndRow = new Text();
long t1 = System.currentTimeMillis();
long numKeys;
numKeys = countIndexEntries(context, prevEndRow, endRow, mapFiles, tmpDir == null ? useIndex : false, readers);
if (numKeys == 0) {
if (useIndex) {
log.warn("Failed to find mid point using indexes, falling back to" + " data files which is slower. No entries between {} and {} for {}", prevEndRow, endRow, mapFiles);
// need to pass original map files, not possibly reduced indexes
return findMidPoint(context, tabletDirectory, prevEndRow, endRow, origMapFiles, minSplit, false);
}
return Collections.emptySortedMap();
}
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(readers);
MultiIterator mmfi = new MultiIterator(iters, true);
// skip the prevEndRow
while (mmfi.hasTop() && mmfi.getTopKey().compareRow(prevEndRow) <= 0) mmfi.next();
// read half of the keys in the index
TreeMap<Double, Key> ret = new TreeMap<>();
Key lastKey = null;
long keysRead = 0;
Key keyBeforeMidPoint = null;
long keyBeforeMidPointPosition = 0;
while (keysRead < numKeys / 2) {
if (lastKey != null && !lastKey.equals(mmfi.getTopKey(), PartialKey.ROW) && (keysRead - 1) / (double) numKeys >= minSplit) {
keyBeforeMidPoint = new Key(lastKey);
keyBeforeMidPointPosition = keysRead - 1;
}
if (lastKey == null)
lastKey = new Key();
lastKey.set(mmfi.getTopKey());
keysRead++;
// consume minimum
mmfi.next();
}
if (keyBeforeMidPoint != null)
ret.put(keyBeforeMidPointPosition / (double) numKeys, keyBeforeMidPoint);
long t2 = System.currentTimeMillis();
log.debug(String.format("Found midPoint from indexes in %6.2f secs.%n", ((t2 - t1) / 1000.0)));
ret.put(.5, mmfi.getTopKey());
// sanity check
for (Key key : ret.values()) {
boolean inRange = (key.compareRow(prevEndRow) > 0 && (endRow == null || key.compareRow(endRow) < 1));
if (!inRange) {
throw new IOException("Found mid point is not in range " + key + " " + prevEndRow + " " + endRow + " " + mapFiles);
}
}
return ret;
} finally {
cleanupIndexOp(tmpDir, context.getVolumeManager(), readers);
}
}
use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.
the class Gatherer method getFilesGroupedByLocation.
/**
* @param fileSelector
* only returns files that match this predicate
* @return A map of the form : {@code map<tserver location, map<path, list<range>>} . The ranges
* associated with a file represent the tablets that use the file.
*/
private Map<String, Map<TabletFile, List<TRowRange>>> getFilesGroupedByLocation(Predicate<TabletFile> fileSelector) {
Iterable<TabletMetadata> tmi = TabletsMetadata.builder(ctx).forTable(tableId).overlapping(startRow, endRow).fetch(FILES, LOCATION, LAST, PREV_ROW).build();
// get a subset of files
Map<TabletFile, List<TabletMetadata>> files = new HashMap<>();
for (TabletMetadata tm : tmi) {
for (TabletFile file : tm.getFiles()) {
if (fileSelector.test(file)) {
// TODO push this filtering to server side and possibly use batch scanner
files.computeIfAbsent(file, s -> new ArrayList<>()).add(tm);
}
}
}
// group by location, then file
Map<String, Map<TabletFile, List<TRowRange>>> locations = new HashMap<>();
List<String> tservers = null;
for (Entry<TabletFile, List<TabletMetadata>> entry : files.entrySet()) {
String location = // filter
entry.getValue().stream().filter(tm -> tm.getLocation() != null).map(// convert to host:port strings
tm -> tm.getLocation().getHostPort()).min(// find minimum host:port
String::compareTo).orElse(// if no locations,
entry.getValue().stream().filter(tm -> tm.getLast() != null).map(// convert to host:port strings
tm -> tm.getLast().getHostPort()).min(String::compareTo).orElse(// find minimum last location or return null
null));
if (location == null) {
if (tservers == null) {
tservers = ctx.instanceOperations().getTabletServers();
Collections.sort(tservers);
}
// When no location, the approach below will consistently choose the same tserver for the
// same file (as long as the set of tservers is stable).
int idx = Math.abs(Hashing.murmur3_32_fixed().hashString(entry.getKey().getPathStr(), UTF_8).asInt()) % tservers.size();
location = tservers.get(idx);
}
// merge contiguous ranges
List<Range> merged = Range.mergeOverlapping(Lists.transform(entry.getValue(), tm -> tm.getExtent().toDataRange()));
List<TRowRange> ranges = // clip
merged.stream().map(r -> toClippedExtent(r).toThrift()).collect(Collectors.toList());
// ranges
// to
// queried
// range
locations.computeIfAbsent(location, s -> new HashMap<>()).put(entry.getKey(), ranges);
}
return locations;
}
Aggregations