use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class OfflineIterator method createIterator.
private SortedKeyValueIterator<Key, Value> createIterator(KeyExtent extent, List<String> absFiles) throws TableNotFoundException, AccumuloException, IOException {
// TODO share code w/ tablet - ACCUMULO-1303
// possible race condition here, if table is renamed
String tableName = Tables.getTableName(conn.getInstance(), tableId);
AccumuloConfiguration acuTableConf = new ConfigurationCopy(conn.tableOperations().getProperties(tableName));
Configuration conf = CachedConfiguration.getInstance();
for (SortedKeyValueIterator<Key, Value> reader : readers) {
((FileSKVIterator) reader).close();
}
readers.clear();
SamplerConfiguration scannerSamplerConfig = options.getSamplerConfiguration();
SamplerConfigurationImpl scannerSamplerConfigImpl = scannerSamplerConfig == null ? null : new SamplerConfigurationImpl(scannerSamplerConfig);
SamplerConfigurationImpl samplerConfImpl = SamplerConfigurationImpl.newSamplerConfig(acuTableConf);
if (scannerSamplerConfigImpl != null && ((samplerConfImpl != null && !scannerSamplerConfigImpl.equals(samplerConfImpl)) || samplerConfImpl == null)) {
throw new SampleNotPresentException();
}
// TODO need to close files - ACCUMULO-1303
for (String file : absFiles) {
FileSystem fs = VolumeConfiguration.getVolume(file, conf, config).getFileSystem();
FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(file, fs, conf).withTableConfiguration(acuTableConf).build();
if (scannerSamplerConfigImpl != null) {
reader = reader.getSample(scannerSamplerConfigImpl);
if (reader == null)
throw new SampleNotPresentException();
}
readers.add(reader);
}
MultiIterator multiIter = new MultiIterator(readers, extent);
OfflineIteratorEnvironment iterEnv = new OfflineIteratorEnvironment(authorizations, acuTableConf, false, samplerConfImpl == null ? null : samplerConfImpl.toSamplerConfiguration());
byte[] defaultSecurityLabel;
ColumnVisibility cv = new ColumnVisibility(acuTableConf.get(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY));
defaultSecurityLabel = cv.getExpression();
SortedKeyValueIterator<Key, Value> visFilter = IteratorUtil.setupSystemScanIterators(multiIter, new HashSet<>(options.fetchedColumns), authorizations, defaultSecurityLabel);
return iterEnv.getTopLevelIterator(IteratorUtil.loadIterators(IteratorScope.scan, visFilter, extent, acuTableConf, options.serverSideIteratorList, options.serverSideIteratorOptions, iterEnv, false));
}
use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class FileUtil method reduceFiles.
public static Collection<String> reduceFiles(AccumuloConfiguration acuConf, Configuration conf, VolumeManager fs, Text prevEndRow, Text endRow, Collection<String> mapFiles, int maxFiles, Path tmpDir, int pass) throws IOException {
ArrayList<String> paths = new ArrayList<>(mapFiles);
if (paths.size() <= maxFiles)
return paths;
String newDir = String.format("%s/pass_%04d", tmpDir, pass);
int start = 0;
ArrayList<String> outFiles = new ArrayList<>();
int count = 0;
while (start < paths.size()) {
int end = Math.min(maxFiles + start, paths.size());
List<String> inFiles = paths.subList(start, end);
start = end;
String newMapFile = String.format("%s/%04d.%s", newDir, count++, RFile.EXTENSION);
outFiles.add(newMapFile);
FileSystem ns = fs.getVolumeByPath(new Path(newMapFile)).getFileSystem();
FileSKVWriter writer = new RFileOperations().newWriterBuilder().forFile(newMapFile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
writer.startDefaultLocalityGroup();
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(inFiles.size());
FileSKVIterator reader = null;
try {
for (String s : inFiles) {
ns = fs.getVolumeByPath(new Path(s)).getFileSystem();
reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(s, ns, ns.getConf()).withTableConfiguration(acuConf).build();
iters.add(reader);
}
MultiIterator mmfi = new MultiIterator(iters, true);
while (mmfi.hasTop()) {
Key key = mmfi.getTopKey();
boolean gtPrevEndRow = prevEndRow == null || key.compareRow(prevEndRow) > 0;
boolean lteEndRow = endRow == null || key.compareRow(endRow) <= 0;
if (gtPrevEndRow && lteEndRow)
writer.append(key, new Value(new byte[0]));
if (!lteEndRow)
break;
mmfi.next();
}
} finally {
try {
if (reader != null)
reader.close();
} catch (IOException e) {
log.error("{}", e.getMessage(), e);
}
for (SortedKeyValueIterator<Key, Value> r : iters) try {
if (r != null)
((FileSKVIterator) r).close();
} catch (IOException e) {
// continue closing
log.error("{}", e.getMessage(), e);
}
try {
writer.close();
} catch (IOException e) {
log.error("{}", e.getMessage(), e);
throw e;
}
}
}
return reduceFiles(acuConf, conf, fs, prevEndRow, endRow, outFiles, maxFiles, tmpDir, pass + 1);
}
use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class CollectTabletStats method createScanIterator.
private static SortedKeyValueIterator<Key, Value> createScanIterator(KeyExtent ke, Collection<SortedKeyValueIterator<Key, Value>> mapfiles, Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet, List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, boolean useTableIterators, TableConfiguration conf) throws IOException {
SortedMapIterator smi = new SortedMapIterator(new TreeMap<>());
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(mapfiles.size() + 1);
iters.addAll(mapfiles);
iters.add(smi);
MultiIterator multiIter = new MultiIterator(iters, ke);
DeletingIterator delIter = new DeletingIterator(multiIter, false);
ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
SortedKeyValueIterator<Key, Value> colFilter = ColumnQualifierFilter.wrap(cfsi, columnSet);
SortedKeyValueIterator<Key, Value> visFilter = VisibilityFilter.wrap(colFilter, authorizations, defaultLabels);
if (useTableIterators)
return IteratorUtil.loadIterators(IteratorScope.scan, visFilter, ke, conf, ssiList, ssio, null);
return visFilter;
}
use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class RFileScanner method iterator.
@Override
public Iterator<Entry<Key, Value>> iterator() {
try {
RFileSource[] sources = opts.in.getSources();
List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(sources.length);
for (int i = 0; i < sources.length; i++) {
// TODO may have been a bug with multiple files and caching in older version...
FSDataInputStream inputStream = (FSDataInputStream) sources[i].getInputStream();
readers.add(new RFile.Reader(new CachableBlockFile.Reader("source-" + i, inputStream, sources[i].getLength(), opts.in.getConf(), dataCache, indexCache, DefaultConfiguration.getInstance())));
}
if (getSamplerConfiguration() != null) {
for (int i = 0; i < readers.size(); i++) {
readers.set(i, ((Reader) readers.get(i)).getSample(new SamplerConfigurationImpl(getSamplerConfiguration())));
}
}
SortedKeyValueIterator<Key, Value> iterator;
if (opts.bounds != null) {
iterator = new MultiIterator(readers, opts.bounds);
} else {
iterator = new MultiIterator(readers, false);
}
Set<ByteSequence> families = Collections.emptySet();
if (opts.useSystemIterators) {
SortedSet<Column> cols = this.getFetchedColumns();
families = LocalityGroupUtil.families(cols);
iterator = IteratorUtil.setupSystemScanIterators(iterator, cols, getAuthorizations(), EMPTY_BYTES);
}
try {
if (opts.tableConfig != null && opts.tableConfig.size() > 0) {
ConfigurationCopy conf = new ConfigurationCopy(opts.tableConfig);
iterator = IteratorUtil.loadIterators(IteratorScope.scan, iterator, null, conf, serverSideIteratorList, serverSideIteratorOptions, new IterEnv());
} else {
iterator = IteratorUtil.loadIterators(iterator, serverSideIteratorList, serverSideIteratorOptions, new IterEnv(), false, null);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
iterator.seek(getRange() == null ? EMPTY_RANGE : getRange(), families, families.size() == 0 ? false : true);
return new IteratorAdapter(iterator);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.
the class AggregatingIteratorTest method test5.
@SuppressWarnings("deprecation")
@Test
public void test5() throws IOException {
// try aggregating across multiple data sets that contain
// the exact same keys w/ different values
TreeMap<Key, Value> tm1 = new TreeMap<>();
newKeyValue(tm1, 1, 1, 1, 1, false, "2");
TreeMap<Key, Value> tm2 = new TreeMap<>();
newKeyValue(tm2, 1, 1, 1, 1, false, "3");
TreeMap<Key, Value> tm3 = new TreeMap<>();
newKeyValue(tm3, 1, 1, 1, 1, false, "4");
AggregatingIterator ai = new AggregatingIterator();
Map<String, String> opts = new HashMap<>();
opts.put("cf001", SummationAggregator.class.getName());
List<SortedKeyValueIterator<Key, Value>> sources = new ArrayList<>(3);
sources.add(new SortedMapIterator(tm1));
sources.add(new SortedMapIterator(tm2));
sources.add(new SortedMapIterator(tm3));
MultiIterator mi = new MultiIterator(sources, true);
ai.init(mi, opts, null);
ai.seek(new Range(), EMPTY_COL_FAMS, false);
assertTrue(ai.hasTop());
assertEquals(newKey(1, 1, 1, 1), ai.getTopKey());
assertEquals("9", ai.getTopValue().toString());
}
Aggregations