use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class FileUtil method estimatePercentageLTE.
public static double estimatePercentageLTE(VolumeManager fs, String tabletDir, AccumuloConfiguration acuconf, Text prevEndRow, Text endRow, Collection<String> mapFiles, Text splitRow) throws IOException {
Configuration conf = CachedConfiguration.getInstance();
Path tmpDir = null;
int maxToOpen = acuconf.getCount(Property.TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN);
ArrayList<FileSKVIterator> readers = new ArrayList<>(mapFiles.size());
try {
if (mapFiles.size() > maxToOpen) {
tmpDir = createTmpDir(acuconf, fs, tabletDir);
log.debug("Too many indexes ({}) to open at once for {} {}, reducing in tmpDir = {}", mapFiles.size(), endRow, prevEndRow, tmpDir);
long t1 = System.currentTimeMillis();
mapFiles = reduceFiles(acuconf, conf, fs, prevEndRow, endRow, mapFiles, maxToOpen, tmpDir, 0);
long t2 = System.currentTimeMillis();
log.debug("Finished reducing indexes for {} {} in {}", endRow, prevEndRow, String.format("%6.2f secs", (t2 - t1) / 1000.0));
}
if (prevEndRow == null)
prevEndRow = new Text();
long numKeys = 0;
numKeys = countIndexEntries(acuconf, prevEndRow, endRow, mapFiles, true, conf, fs, readers);
if (numKeys == 0) {
// the data just punt and return .5
return .5;
}
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(readers);
MultiIterator mmfi = new MultiIterator(iters, true);
// skip the prevendrow
while (mmfi.hasTop() && mmfi.getTopKey().compareRow(prevEndRow) <= 0) {
mmfi.next();
}
int numLte = 0;
while (mmfi.hasTop() && mmfi.getTopKey().compareRow(splitRow) <= 0) {
numLte++;
mmfi.next();
}
if (numLte > numKeys) {
// something went wrong
throw new RuntimeException("numLte > numKeys " + numLte + " " + numKeys + " " + prevEndRow + " " + endRow + " " + splitRow + " " + mapFiles);
}
// do not want to return 0% or 100%, so add 1 and 2 below
return (numLte + 1) / (double) (numKeys + 2);
} finally {
cleanupIndexOp(tmpDir, fs, readers);
}
}
use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class FileUtil method findMidPoint.
/**
* @param mapFiles
* - list MapFiles to find the mid point key
*
* ISSUES : This method used the index files to find the mid point. If the map files have different index intervals this method will not return an
* accurate mid point. Also, it would be tricky to use this method in conjunction with an in memory map because the indexing interval is unknown.
*/
public static SortedMap<Double, Key> findMidPoint(VolumeManager fs, String tabletDirectory, AccumuloConfiguration acuConf, Text prevEndRow, Text endRow, Collection<String> mapFiles, double minSplit, boolean useIndex) throws IOException {
Configuration conf = CachedConfiguration.getInstance();
Collection<String> origMapFiles = mapFiles;
Path tmpDir = null;
int maxToOpen = acuConf.getCount(Property.TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN);
ArrayList<FileSKVIterator> readers = new ArrayList<>(mapFiles.size());
try {
if (mapFiles.size() > maxToOpen) {
if (!useIndex)
throw new IOException("Cannot find mid point using data files, too many " + mapFiles.size());
tmpDir = createTmpDir(acuConf, fs, tabletDirectory);
log.debug("Too many indexes ({}) to open at once for {} {}, reducing in tmpDir = {}", mapFiles.size(), endRow, prevEndRow, tmpDir);
long t1 = System.currentTimeMillis();
mapFiles = reduceFiles(acuConf, conf, fs, prevEndRow, endRow, mapFiles, maxToOpen, tmpDir, 0);
long t2 = System.currentTimeMillis();
log.debug("Finished reducing indexes for {} {} in {}", endRow, prevEndRow, String.format("%6.2f secs", (t2 - t1) / 1000.0));
}
if (prevEndRow == null)
prevEndRow = new Text();
long t1 = System.currentTimeMillis();
long numKeys = 0;
numKeys = countIndexEntries(acuConf, prevEndRow, endRow, mapFiles, tmpDir == null ? useIndex : false, conf, fs, readers);
if (numKeys == 0) {
if (useIndex) {
log.warn("Failed to find mid point using indexes, falling back to data files which is slower. No entries between {} and {} for {}", prevEndRow, endRow, mapFiles);
// need to pass original map files, not possibly reduced indexes
return findMidPoint(fs, tabletDirectory, acuConf, prevEndRow, endRow, origMapFiles, minSplit, false);
}
throw new IOException("Failed to find mid point, no entries between " + prevEndRow + " and " + endRow + " for " + mapFiles);
}
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(readers);
MultiIterator mmfi = new MultiIterator(iters, true);
// skip the prevendrow
while (mmfi.hasTop() && mmfi.getTopKey().compareRow(prevEndRow) <= 0) mmfi.next();
// read half of the keys in the index
TreeMap<Double, Key> ret = new TreeMap<>();
Key lastKey = null;
long keysRead = 0;
Key keyBeforeMidPoint = null;
long keyBeforeMidPointPosition = 0;
while (keysRead < numKeys / 2) {
if (lastKey != null && !lastKey.equals(mmfi.getTopKey(), PartialKey.ROW) && (keysRead - 1) / (double) numKeys >= minSplit) {
keyBeforeMidPoint = new Key(lastKey);
keyBeforeMidPointPosition = keysRead - 1;
}
if (lastKey == null)
lastKey = new Key();
lastKey.set(mmfi.getTopKey());
keysRead++;
// consume minimum
mmfi.next();
}
if (keyBeforeMidPoint != null)
ret.put(keyBeforeMidPointPosition / (double) numKeys, keyBeforeMidPoint);
long t2 = System.currentTimeMillis();
log.debug(String.format("Found midPoint from indexes in %6.2f secs.%n", ((t2 - t1) / 1000.0)));
ret.put(.5, mmfi.getTopKey());
// sanity check
for (Key key : ret.values()) {
boolean inRange = (key.compareRow(prevEndRow) > 0 && (endRow == null || key.compareRow(endRow) < 1));
if (!inRange) {
throw new IOException("Found mid point is not in range " + key + " " + prevEndRow + " " + endRow + " " + mapFiles);
}
}
return ret;
} finally {
cleanupIndexOp(tmpDir, fs, readers);
}
}
use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class CombinerTest method test5.
@Test
public void test5() throws IOException {
Encoder<Long> encoder = LongCombiner.STRING_ENCODER;
// try aggregating across multiple data sets that contain
// the exact same keys w/ different values
TreeMap<Key, Value> tm1 = new TreeMap<>();
newKeyValue(tm1, 1, 1, 1, 1, false, 2l, encoder);
TreeMap<Key, Value> tm2 = new TreeMap<>();
newKeyValue(tm2, 1, 1, 1, 1, false, 3l, encoder);
TreeMap<Key, Value> tm3 = new TreeMap<>();
newKeyValue(tm3, 1, 1, 1, 1, false, 4l, encoder);
Combiner ai = new SummingCombiner();
IteratorSetting is = new IteratorSetting(1, SummingCombiner.class);
LongCombiner.setEncodingType(is, StringEncoder.class);
Combiner.setColumns(is, Collections.singletonList(new IteratorSetting.Column("cf001")));
List<SortedKeyValueIterator<Key, Value>> sources = new ArrayList<>(3);
sources.add(new SortedMapIterator(tm1));
sources.add(new SortedMapIterator(tm2));
sources.add(new SortedMapIterator(tm3));
MultiIterator mi = new MultiIterator(sources, true);
ai.init(mi, is.getOptions(), SCAN_IE);
ai.seek(new Range(), EMPTY_COL_FAMS, false);
assertTrue(ai.hasTop());
assertEquals(newKey(1, 1, 1, 1), ai.getTopKey());
assertEquals("9", encoder.decode(ai.getTopValue().get()).toString());
}
use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class IntersectingIteratorTest method test3.
@Test
public void test3() throws IOException {
columnFamilies = new Text[6];
columnFamilies[0] = new Text("C");
columnFamilies[1] = new Text("E");
columnFamilies[2] = new Text("G");
columnFamilies[3] = new Text("H");
columnFamilies[4] = new Text("I");
columnFamilies[5] = new Text("J");
otherColumnFamilies = new Text[4];
otherColumnFamilies[0] = new Text("A");
otherColumnFamilies[1] = new Text("B");
otherColumnFamilies[2] = new Text("D");
otherColumnFamilies[3] = new Text("F");
float hitRatio = 0.5f;
SortedKeyValueIterator<Key, Value> source = createIteratorStack(hitRatio, NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
SortedKeyValueIterator<Key, Value> source2 = createIteratorStack(hitRatio, NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
ArrayList<SortedKeyValueIterator<Key, Value>> sourceIters = new ArrayList<>();
sourceIters.add(source);
sourceIters.add(source2);
MultiIterator mi = new MultiIterator(sourceIters, false);
IteratorSetting is = new IteratorSetting(1, IntersectingIterator.class);
IntersectingIterator.setColumnFamilies(is, columnFamilies);
IntersectingIterator iter = new IntersectingIterator();
iter.init(mi, is.getOptions(), env);
iter.seek(new Range(), EMPTY_COL_FAMS, false);
int hitCount = 0;
while (iter.hasTop()) {
hitCount++;
Key k = iter.getTopKey();
assertTrue(docs.contains(k.getColumnQualifier()));
iter.next();
}
assertTrue(hitCount == docs.size());
cleanup();
}
Aggregations