use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class MultiIteratorTest method test7.
@Test
public void test7() throws IOException {
// TEst setting an endKey
TreeMap<Key, Value> tm1 = new TreeMap<>();
newKeyValue(tm1, 0, 3, false, "1");
newKeyValue(tm1, 0, 2, false, "2");
newKeyValue(tm1, 0, 1, false, "3");
newKeyValue(tm1, 0, 0, false, "4");
newKeyValue(tm1, 1, 2, false, "5");
newKeyValue(tm1, 1, 1, false, "6");
newKeyValue(tm1, 1, 0, false, "7");
newKeyValue(tm1, 2, 1, false, "8");
newKeyValue(tm1, 2, 0, false, "9");
List<SortedKeyValueIterator<Key, Value>> skvil = new ArrayList<>(1);
skvil.add(new SortedMapIterator(tm1));
KeyExtent extent = new KeyExtent(TableId.of("tablename"), newRow(1), newRow(0));
MultiIterator mi = new MultiIterator(skvil, extent);
Range r1 = new Range((Text) null, (Text) null);
mi.seek(r1, EMPTY_COL_FAMS, false);
assertTrue(mi.hasTop());
assertEquals("5", mi.getTopValue().toString());
mi.next();
assertTrue(mi.hasTop());
assertEquals("6", mi.getTopValue().toString());
mi.next();
assertTrue(mi.hasTop());
assertEquals("7", mi.getTopValue().toString());
mi.next();
assertFalse(mi.hasTop());
Range r2 = new Range(newKey(0, 0), true, newKey(1, 1), true);
mi.seek(r2, EMPTY_COL_FAMS, false);
assertTrue(mi.hasTop());
assertEquals("5", mi.getTopValue().toString());
mi.next();
assertTrue(mi.hasTop());
assertEquals("6", mi.getTopValue().toString());
mi.next();
assertFalse(mi.hasTop());
Range r3 = new Range(newKey(0, 0), false, newKey(1, 1), false);
mi.seek(r3, EMPTY_COL_FAMS, false);
assertTrue(mi.hasTop());
assertEquals("5", mi.getTopValue().toString());
mi.next();
assertFalse(mi.hasTop());
Range r4 = new Range(newKey(1, 2), true, newKey(1, 1), false);
mi.seek(r4, EMPTY_COL_FAMS, false);
assertTrue(mi.hasTop());
assertEquals("5", mi.getTopValue().toString());
mi.next();
assertFalse(mi.hasTop());
Range r5 = new Range(newKey(1, 2), false, newKey(1, 1), true);
mi.seek(r5, EMPTY_COL_FAMS, false);
assertTrue(mi.hasTop());
assertEquals("6", mi.getTopValue().toString());
mi.next();
assertFalse(mi.hasTop());
Range r6 = new Range(newKey(2, 1), true, newKey(2, 0), true);
mi.seek(r6, EMPTY_COL_FAMS, false);
assertFalse(mi.hasTop());
Range r7 = new Range(newKey(0, 3), true, newKey(0, 1), true);
mi.seek(r7, EMPTY_COL_FAMS, false);
assertFalse(mi.hasTop());
}
use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class FileCompactor method compactLocalityGroup.
private void compactLocalityGroup(String lgName, Set<ByteSequence> columnFamilies, boolean inclusive, FileSKVWriter mfw, CompactionStats majCStats) throws IOException, CompactionCanceledException {
ArrayList<FileSKVIterator> readers = new ArrayList<>(filesToCompact.size());
Span compactSpan = TraceUtil.startSpan(this.getClass(), "compact");
try (Scope span = compactSpan.makeCurrent()) {
long entriesCompacted = 0;
List<SortedKeyValueIterator<Key, Value>> iters = openMapDataFiles(readers);
if (env.getIteratorScope() == IteratorScope.minc) {
iters.add(env.getMinCIterator());
}
CountingIterator citr = new CountingIterator(new MultiIterator(iters, extent.toDataRange()), entriesRead);
SortedKeyValueIterator<Key, Value> delIter = DeletingIterator.wrap(citr, propagateDeletes, DeletingIterator.getBehavior(acuTableConf));
ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
// if(env.getIteratorScope() )
SystemIteratorEnvironment iterEnv = env.createIteratorEnv(context, acuTableConf, getExtent().tableId());
SortedKeyValueIterator<Key, Value> itr = iterEnv.getTopLevelIterator(IterConfigUtil.convertItersAndLoad(env.getIteratorScope(), cfsi, acuTableConf, iterators, iterEnv));
itr.seek(extent.toDataRange(), columnFamilies, inclusive);
if (inclusive) {
mfw.startNewLocalityGroup(lgName, columnFamilies);
} else {
mfw.startDefaultLocalityGroup();
}
Span writeSpan = TraceUtil.startSpan(this.getClass(), "write");
try (Scope write = writeSpan.makeCurrent()) {
while (itr.hasTop() && env.isCompactionEnabled()) {
mfw.append(itr.getTopKey(), itr.getTopValue());
itr.next();
entriesCompacted++;
if (entriesCompacted % 1024 == 0) {
// Periodically update stats, do not want to do this too often since its volatile
entriesWritten.addAndGet(1024);
}
}
if (itr.hasTop() && !env.isCompactionEnabled()) {
// cancel major compaction operation
try {
try {
mfw.close();
} catch (IOException e) {
log.error("{}", e.getMessage(), e);
}
fs.deleteRecursively(outputFile.getPath());
} catch (Exception e) {
log.warn("Failed to delete Canceled compaction output file {}", outputFile, e);
}
throw new CompactionCanceledException();
}
} finally {
CompactionStats lgMajcStats = new CompactionStats(citr.getCount(), entriesCompacted);
majCStats.add(lgMajcStats);
writeSpan.end();
}
} catch (Exception e) {
TraceUtil.setException(compactSpan, e, true);
throw e;
} finally {
// close sequence files opened
for (FileSKVIterator reader : readers) {
try {
reader.close();
} catch (Exception e) {
log.warn("Failed to close map file", e);
}
}
compactSpan.end();
}
}
use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class FileUtil method estimatePercentageLTE.
public static double estimatePercentageLTE(ServerContext context, String tabletDir, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, Text splitRow) throws IOException {
Path tmpDir = null;
int maxToOpen = context.getConfiguration().getCount(Property.TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN);
ArrayList<FileSKVIterator> readers = new ArrayList<>(mapFiles.size());
try {
if (mapFiles.size() > maxToOpen) {
tmpDir = createTmpDir(context, tabletDir);
log.debug("Too many indexes ({}) to open at once for {} {}, reducing in tmpDir = {}", mapFiles.size(), endRow, prevEndRow, tmpDir);
long t1 = System.currentTimeMillis();
mapFiles = reduceFiles(context, context.getHadoopConf(), prevEndRow, endRow, mapFiles, maxToOpen, tmpDir, 0);
long t2 = System.currentTimeMillis();
log.debug("Finished reducing indexes for {} {} in {}", endRow, prevEndRow, String.format("%6.2f secs", (t2 - t1) / 1000.0));
}
if (prevEndRow == null)
prevEndRow = new Text();
long numKeys;
numKeys = countIndexEntries(context, prevEndRow, endRow, mapFiles, true, readers);
if (numKeys == 0) {
// the data just punt and return .5
return .5;
}
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(readers);
MultiIterator mmfi = new MultiIterator(iters, true);
// skip the prevEndRow
while (mmfi.hasTop() && mmfi.getTopKey().compareRow(prevEndRow) <= 0) {
mmfi.next();
}
int numLte = 0;
while (mmfi.hasTop() && mmfi.getTopKey().compareRow(splitRow) <= 0) {
numLte++;
mmfi.next();
}
if (numLte > numKeys) {
// something went wrong
throw new RuntimeException("numLte > numKeys " + numLte + " " + numKeys + " " + prevEndRow + " " + endRow + " " + splitRow + " " + mapFiles);
}
// do not want to return 0% or 100%, so add 1 and 2 below
return (numLte + 1) / (double) (numKeys + 2);
} finally {
cleanupIndexOp(tmpDir, context.getVolumeManager(), readers);
}
}
use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class FileUtil method findMidPoint.
/**
* @param mapFiles
* - list MapFiles to find the mid point key
*
* ISSUES : This method used the index files to find the mid point. If the map files have
* different index intervals this method will not return an accurate mid point. Also, it
* would be tricky to use this method in conjunction with an in memory map because the
* indexing interval is unknown.
*/
public static SortedMap<Double, Key> findMidPoint(ServerContext context, String tabletDirectory, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, double minSplit, boolean useIndex) throws IOException {
Collection<TabletFile> origMapFiles = mapFiles;
Path tmpDir = null;
int maxToOpen = context.getConfiguration().getCount(Property.TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN);
ArrayList<FileSKVIterator> readers = new ArrayList<>(mapFiles.size());
try {
if (mapFiles.size() > maxToOpen) {
if (!useIndex)
throw new IOException("Cannot find mid point using data files, too many " + mapFiles.size());
tmpDir = createTmpDir(context, tabletDirectory);
log.debug("Too many indexes ({}) to open at once for {} {}, reducing in tmpDir = {}", mapFiles.size(), endRow, prevEndRow, tmpDir);
long t1 = System.currentTimeMillis();
mapFiles = reduceFiles(context, context.getHadoopConf(), prevEndRow, endRow, mapFiles, maxToOpen, tmpDir, 0);
long t2 = System.currentTimeMillis();
log.debug("Finished reducing indexes for {} {} in {}", endRow, prevEndRow, String.format("%6.2f secs", (t2 - t1) / 1000.0));
}
if (prevEndRow == null)
prevEndRow = new Text();
long t1 = System.currentTimeMillis();
long numKeys;
numKeys = countIndexEntries(context, prevEndRow, endRow, mapFiles, tmpDir == null ? useIndex : false, readers);
if (numKeys == 0) {
if (useIndex) {
log.warn("Failed to find mid point using indexes, falling back to" + " data files which is slower. No entries between {} and {} for {}", prevEndRow, endRow, mapFiles);
// need to pass original map files, not possibly reduced indexes
return findMidPoint(context, tabletDirectory, prevEndRow, endRow, origMapFiles, minSplit, false);
}
return Collections.emptySortedMap();
}
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(readers);
MultiIterator mmfi = new MultiIterator(iters, true);
// skip the prevEndRow
while (mmfi.hasTop() && mmfi.getTopKey().compareRow(prevEndRow) <= 0) mmfi.next();
// read half of the keys in the index
TreeMap<Double, Key> ret = new TreeMap<>();
Key lastKey = null;
long keysRead = 0;
Key keyBeforeMidPoint = null;
long keyBeforeMidPointPosition = 0;
while (keysRead < numKeys / 2) {
if (lastKey != null && !lastKey.equals(mmfi.getTopKey(), PartialKey.ROW) && (keysRead - 1) / (double) numKeys >= minSplit) {
keyBeforeMidPoint = new Key(lastKey);
keyBeforeMidPointPosition = keysRead - 1;
}
if (lastKey == null)
lastKey = new Key();
lastKey.set(mmfi.getTopKey());
keysRead++;
// consume minimum
mmfi.next();
}
if (keyBeforeMidPoint != null)
ret.put(keyBeforeMidPointPosition / (double) numKeys, keyBeforeMidPoint);
long t2 = System.currentTimeMillis();
log.debug(String.format("Found midPoint from indexes in %6.2f secs.%n", ((t2 - t1) / 1000.0)));
ret.put(.5, mmfi.getTopKey());
// sanity check
for (Key key : ret.values()) {
boolean inRange = (key.compareRow(prevEndRow) > 0 && (endRow == null || key.compareRow(endRow) < 1));
if (!inRange) {
throw new IOException("Found mid point is not in range " + key + " " + prevEndRow + " " + endRow + " " + mapFiles);
}
}
return ret;
} finally {
cleanupIndexOp(tmpDir, context.getVolumeManager(), readers);
}
}
use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class CollectTabletStats method createScanIterator.
private static SortedKeyValueIterator<Key, Value> createScanIterator(KeyExtent ke, Collection<SortedKeyValueIterator<Key, Value>> mapfiles, Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet, List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, boolean useTableIterators, TableConfiguration conf) throws IOException {
SortedMapIterator smi = new SortedMapIterator(new TreeMap<>());
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(mapfiles.size() + 1);
iters.addAll(mapfiles);
iters.add(smi);
MultiIterator multiIter = new MultiIterator(iters, ke);
SortedKeyValueIterator<Key, Value> delIter = DeletingIterator.wrap(multiIter, false, Behavior.PROCESS);
ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
SortedKeyValueIterator<Key, Value> colFilter = ColumnQualifierFilter.wrap(cfsi, columnSet);
SortedKeyValueIterator<Key, Value> visFilter = VisibilityFilter.wrap(colFilter, authorizations, defaultLabels);
if (useTableIterators) {
IterLoad il = IterConfigUtil.loadIterConf(IteratorScope.scan, ssiList, ssio, conf);
return IterConfigUtil.loadIterators(visFilter, il.useAccumuloClassLoader(true));
}
return visFilter;
}
Aggregations