use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class RFileScanner method iterator.
@Override
public Iterator<Entry<Key, Value>> iterator() {
try {
RFileSource[] sources = opts.in.getSources();
List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(sources.length);
CacheProvider cacheProvider = new BasicCacheProvider(indexCache, dataCache);
for (int i = 0; i < sources.length; i++) {
// TODO may have been a bug with multiple files and caching in older version...
FSDataInputStream inputStream = (FSDataInputStream) sources[i].getInputStream();
CachableBuilder cb = new CachableBuilder().input(inputStream, "source-" + i).length(sources[i].getLength()).conf(opts.in.getConf()).cacheProvider(cacheProvider).cryptoService(cryptoService);
readers.add(new RFile.Reader(cb));
}
if (getSamplerConfiguration() != null) {
for (int i = 0; i < readers.size(); i++) {
readers.set(i, ((Reader) readers.get(i)).getSample(new SamplerConfigurationImpl(getSamplerConfiguration())));
}
}
SortedKeyValueIterator<Key, Value> iterator;
if (opts.bounds != null) {
iterator = new MultiIterator(readers, opts.bounds);
} else {
iterator = new MultiIterator(readers, false);
}
Set<ByteSequence> families = Collections.emptySet();
if (opts.useSystemIterators) {
SortedSet<Column> cols = this.getFetchedColumns();
families = LocalityGroupUtil.families(cols);
iterator = SystemIteratorUtil.setupSystemScanIterators(iterator, cols, getAuthorizations(), EMPTY_BYTES, tableConf);
}
try {
if (opts.tableConfig != null && !opts.tableConfig.isEmpty()) {
IterLoad il = IterConfigUtil.loadIterConf(IteratorScope.scan, serverSideIteratorList, serverSideIteratorOptions, tableConf);
iterator = IterConfigUtil.loadIterators(iterator, il.iterEnv(new IterEnv()).useAccumuloClassLoader(true));
} else {
iterator = IterConfigUtil.loadIterators(iterator, new IterLoad().iters(serverSideIteratorList).iterOpts(serverSideIteratorOptions).iterEnv(new IterEnv()).useAccumuloClassLoader(false));
}
} catch (IOException e) {
throw new RuntimeException(e);
}
iterator.seek(getRange() == null ? EMPTY_RANGE : getRange(), families, !families.isEmpty());
return new IteratorAdapter(iterator);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class GenerateSplits method getSplitsFromFullScan.
private TreeSet<String> getSplitsFromFullScan(SiteConfiguration accumuloConf, Configuration hadoopConf, List<Path> files, FileSystem fs, int numSplits, boolean base64encode) throws IOException {
Text[] splitArray;
List<FileSKVIterator> fileReaders = new ArrayList<>(files.size());
List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(files.size());
SortedKeyValueIterator<Key, Value> iterator;
try {
for (Path file : files) {
FileSKVIterator reader = FileOperations.getInstance().newScanReaderBuilder().forFile(file.toString(), fs, hadoopConf, CryptoServiceFactory.newDefaultInstance()).withTableConfiguration(accumuloConf).overRange(new Range(), Set.of(), false).build();
readers.add(reader);
fileReaders.add(reader);
}
iterator = new MultiIterator(readers, false);
iterator.seek(new Range(), Collections.emptySet(), false);
splitArray = getQuantiles(iterator, numSplits);
} finally {
for (var r : fileReaders) {
r.close();
}
}
log.debug("Got {} splits from quantiles across {} files", splitArray.length, files.size());
return Arrays.stream(splitArray).map(t -> encode(base64encode, t)).collect(toCollection(TreeSet::new));
}
use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class GenerateSplits method getIndexKeys.
/**
* Scan the files for indexed keys first since it is more efficient than a full file scan.
*/
private TreeSet<String> getIndexKeys(AccumuloConfiguration accumuloConf, Configuration hadoopConf, FileSystem fs, List<Path> files, int requestedNumSplits, boolean base64encode) throws IOException {
Text[] splitArray;
List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(files.size());
List<FileSKVIterator> fileReaders = new ArrayList<>(files.size());
try {
for (Path file : files) {
FileSKVIterator reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(file.toString(), fs, hadoopConf, CryptoServiceFactory.newDefaultInstance()).withTableConfiguration(accumuloConf).build();
readers.add(reader);
fileReaders.add(reader);
}
var iterator = new MultiIterator(readers, true);
splitArray = getQuantiles(iterator, requestedNumSplits);
} finally {
for (var r : fileReaders) {
r.close();
}
}
log.debug("Got {} splits from indices of {}", splitArray.length, files);
return Arrays.stream(splitArray).map(t -> encode(base64encode, t)).collect(toCollection(TreeSet::new));
}
use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class CombinerTest method test5.
@Test
public void test5() throws IOException {
Encoder<Long> encoder = LongCombiner.STRING_ENCODER;
// try aggregating across multiple data sets that contain
// the exact same keys w/ different values
TreeMap<Key, Value> tm1 = new TreeMap<>();
newKeyValue(tm1, 1, 1, 1, 1, false, 2L, encoder);
TreeMap<Key, Value> tm2 = new TreeMap<>();
newKeyValue(tm2, 1, 1, 1, 1, false, 3L, encoder);
TreeMap<Key, Value> tm3 = new TreeMap<>();
newKeyValue(tm3, 1, 1, 1, 1, false, 4L, encoder);
Combiner ai = new SummingCombiner();
IteratorSetting is = new IteratorSetting(1, SummingCombiner.class);
LongCombiner.setEncodingType(is, StringEncoder.class);
Combiner.setColumns(is, Collections.singletonList(new IteratorSetting.Column("cf001")));
List<SortedKeyValueIterator<Key, Value>> sources = new ArrayList<>(3);
sources.add(new SortedMapIterator(tm1));
sources.add(new SortedMapIterator(tm2));
sources.add(new SortedMapIterator(tm3));
MultiIterator mi = new MultiIterator(sources, true);
ai.init(mi, is.getOptions(), SCAN_IE);
ai.seek(new Range(), EMPTY_COL_FAMS, false);
assertTrue(ai.hasTop());
assertEquals(newKey(1, 1, 1, 1), ai.getTopKey());
assertEquals("9", encoder.decode(ai.getTopValue().get()).toString());
}
use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.
the class IntersectingIteratorTest method test3.
@Test
public void test3() throws IOException {
columnFamilies = new Text[6];
columnFamilies[0] = new Text("C");
columnFamilies[1] = new Text("E");
columnFamilies[2] = new Text("G");
columnFamilies[3] = new Text("H");
columnFamilies[4] = new Text("I");
columnFamilies[5] = new Text("J");
otherColumnFamilies = new Text[4];
otherColumnFamilies[0] = new Text("A");
otherColumnFamilies[1] = new Text("B");
otherColumnFamilies[2] = new Text("D");
otherColumnFamilies[3] = new Text("F");
float hitRatio = 0.5f;
SortedKeyValueIterator<Key, Value> source = createIteratorStack(hitRatio, NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
SortedKeyValueIterator<Key, Value> source2 = createIteratorStack(hitRatio, NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
ArrayList<SortedKeyValueIterator<Key, Value>> sourceIters = new ArrayList<>();
sourceIters.add(source);
sourceIters.add(source2);
MultiIterator mi = new MultiIterator(sourceIters, false);
IteratorSetting is = new IteratorSetting(1, IntersectingIterator.class);
IntersectingIterator.setColumnFamilies(is, columnFamilies);
IntersectingIterator iter = new IntersectingIterator();
iter.init(mi, is.getOptions(), env);
iter.seek(new Range(), EMPTY_COL_FAMS, false);
int hitCount = 0;
while (iter.hasTop()) {
hitCount++;
Key k = iter.getTopKey();
assertTrue(docs.contains(k.getColumnQualifier()));
iter.next();
}
assertEquals(hitCount, docs.size());
cleanup();
}
Aggregations