Search in sources :

Example 6 with MultiIterator

use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.

the class OfflineIterator method createIterator.

private SortedKeyValueIterator<Key, Value> createIterator(KeyExtent extent, List<String> absFiles) throws TableNotFoundException, AccumuloException, IOException {
    // TODO share code w/ tablet - ACCUMULO-1303
    // possible race condition here, if table is renamed
    String tableName = Tables.getTableName(conn.getInstance(), tableId);
    AccumuloConfiguration acuTableConf = new ConfigurationCopy(conn.tableOperations().getProperties(tableName));
    Configuration conf = CachedConfiguration.getInstance();
    for (SortedKeyValueIterator<Key, Value> reader : readers) {
        ((FileSKVIterator) reader).close();
    }
    readers.clear();
    SamplerConfiguration scannerSamplerConfig = options.getSamplerConfiguration();
    SamplerConfigurationImpl scannerSamplerConfigImpl = scannerSamplerConfig == null ? null : new SamplerConfigurationImpl(scannerSamplerConfig);
    SamplerConfigurationImpl samplerConfImpl = SamplerConfigurationImpl.newSamplerConfig(acuTableConf);
    if (scannerSamplerConfigImpl != null && ((samplerConfImpl != null && !scannerSamplerConfigImpl.equals(samplerConfImpl)) || samplerConfImpl == null)) {
        throw new SampleNotPresentException();
    }
    // TODO need to close files - ACCUMULO-1303
    for (String file : absFiles) {
        FileSystem fs = VolumeConfiguration.getVolume(file, conf, config).getFileSystem();
        FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(file, fs, conf).withTableConfiguration(acuTableConf).build();
        if (scannerSamplerConfigImpl != null) {
            reader = reader.getSample(scannerSamplerConfigImpl);
            if (reader == null)
                throw new SampleNotPresentException();
        }
        readers.add(reader);
    }
    MultiIterator multiIter = new MultiIterator(readers, extent);
    OfflineIteratorEnvironment iterEnv = new OfflineIteratorEnvironment(authorizations, acuTableConf, false, samplerConfImpl == null ? null : samplerConfImpl.toSamplerConfiguration());
    byte[] defaultSecurityLabel;
    ColumnVisibility cv = new ColumnVisibility(acuTableConf.get(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY));
    defaultSecurityLabel = cv.getExpression();
    SortedKeyValueIterator<Key, Value> visFilter = IteratorUtil.setupSystemScanIterators(multiIter, new HashSet<>(options.fetchedColumns), authorizations, defaultSecurityLabel);
    return iterEnv.getTopLevelIterator(IteratorUtil.loadIterators(IteratorScope.scan, visFilter, extent, acuTableConf, options.serverSideIteratorList, options.serverSideIteratorOptions, iterEnv, false));
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) VolumeConfiguration(org.apache.accumulo.core.volume.VolumeConfiguration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) SampleNotPresentException(org.apache.accumulo.core.client.SampleNotPresentException) FileSystem(org.apache.hadoop.fs.FileSystem) KeyValue(org.apache.accumulo.core.data.KeyValue) Value(org.apache.accumulo.core.data.Value) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Example 7 with MultiIterator

use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.

the class FileUtil method reduceFiles.

public static Collection<String> reduceFiles(AccumuloConfiguration acuConf, Configuration conf, VolumeManager fs, Text prevEndRow, Text endRow, Collection<String> mapFiles, int maxFiles, Path tmpDir, int pass) throws IOException {
    ArrayList<String> paths = new ArrayList<>(mapFiles);
    if (paths.size() <= maxFiles)
        return paths;
    String newDir = String.format("%s/pass_%04d", tmpDir, pass);
    int start = 0;
    ArrayList<String> outFiles = new ArrayList<>();
    int count = 0;
    while (start < paths.size()) {
        int end = Math.min(maxFiles + start, paths.size());
        List<String> inFiles = paths.subList(start, end);
        start = end;
        String newMapFile = String.format("%s/%04d.%s", newDir, count++, RFile.EXTENSION);
        outFiles.add(newMapFile);
        FileSystem ns = fs.getVolumeByPath(new Path(newMapFile)).getFileSystem();
        FileSKVWriter writer = new RFileOperations().newWriterBuilder().forFile(newMapFile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
        writer.startDefaultLocalityGroup();
        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(inFiles.size());
        FileSKVIterator reader = null;
        try {
            for (String s : inFiles) {
                ns = fs.getVolumeByPath(new Path(s)).getFileSystem();
                reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(s, ns, ns.getConf()).withTableConfiguration(acuConf).build();
                iters.add(reader);
            }
            MultiIterator mmfi = new MultiIterator(iters, true);
            while (mmfi.hasTop()) {
                Key key = mmfi.getTopKey();
                boolean gtPrevEndRow = prevEndRow == null || key.compareRow(prevEndRow) > 0;
                boolean lteEndRow = endRow == null || key.compareRow(endRow) <= 0;
                if (gtPrevEndRow && lteEndRow)
                    writer.append(key, new Value(new byte[0]));
                if (!lteEndRow)
                    break;
                mmfi.next();
            }
        } finally {
            try {
                if (reader != null)
                    reader.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
            }
            for (SortedKeyValueIterator<Key, Value> r : iters) try {
                if (r != null)
                    ((FileSKVIterator) r).close();
            } catch (IOException e) {
                // continue closing
                log.error("{}", e.getMessage(), e);
            }
            try {
                writer.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
                throw e;
            }
        }
    }
    return reduceFiles(acuConf, conf, fs, prevEndRow, endRow, outFiles, maxFiles, tmpDir, pass + 1);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) IOException(java.io.IOException) RFileOperations(org.apache.accumulo.core.file.rfile.RFileOperations) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 8 with MultiIterator

use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.

the class CollectTabletStats method createScanIterator.

private static SortedKeyValueIterator<Key, Value> createScanIterator(KeyExtent ke, Collection<SortedKeyValueIterator<Key, Value>> mapfiles, Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet, List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, boolean useTableIterators, TableConfiguration conf) throws IOException {
    SortedMapIterator smi = new SortedMapIterator(new TreeMap<>());
    List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(mapfiles.size() + 1);
    iters.addAll(mapfiles);
    iters.add(smi);
    MultiIterator multiIter = new MultiIterator(iters, ke);
    DeletingIterator delIter = new DeletingIterator(multiIter, false);
    ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
    SortedKeyValueIterator<Key, Value> colFilter = ColumnQualifierFilter.wrap(cfsi, columnSet);
    SortedKeyValueIterator<Key, Value> visFilter = VisibilityFilter.wrap(colFilter, authorizations, defaultLabels);
    if (useTableIterators)
        return IteratorUtil.loadIterators(IteratorScope.scan, visFilter, ke, conf, ssiList, ssio, null);
    return visFilter;
}
Also used : ColumnFamilySkippingIterator(org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator) MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) ArrayList(java.util.ArrayList) Value(org.apache.accumulo.core.data.Value) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) DeletingIterator(org.apache.accumulo.core.iterators.system.DeletingIterator) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Key(org.apache.accumulo.core.data.Key)

Example 9 with MultiIterator

use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.

the class RFileScanner method iterator.

@Override
public Iterator<Entry<Key, Value>> iterator() {
    try {
        RFileSource[] sources = opts.in.getSources();
        List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(sources.length);
        for (int i = 0; i < sources.length; i++) {
            // TODO may have been a bug with multiple files and caching in older version...
            FSDataInputStream inputStream = (FSDataInputStream) sources[i].getInputStream();
            readers.add(new RFile.Reader(new CachableBlockFile.Reader("source-" + i, inputStream, sources[i].getLength(), opts.in.getConf(), dataCache, indexCache, DefaultConfiguration.getInstance())));
        }
        if (getSamplerConfiguration() != null) {
            for (int i = 0; i < readers.size(); i++) {
                readers.set(i, ((Reader) readers.get(i)).getSample(new SamplerConfigurationImpl(getSamplerConfiguration())));
            }
        }
        SortedKeyValueIterator<Key, Value> iterator;
        if (opts.bounds != null) {
            iterator = new MultiIterator(readers, opts.bounds);
        } else {
            iterator = new MultiIterator(readers, false);
        }
        Set<ByteSequence> families = Collections.emptySet();
        if (opts.useSystemIterators) {
            SortedSet<Column> cols = this.getFetchedColumns();
            families = LocalityGroupUtil.families(cols);
            iterator = IteratorUtil.setupSystemScanIterators(iterator, cols, getAuthorizations(), EMPTY_BYTES);
        }
        try {
            if (opts.tableConfig != null && opts.tableConfig.size() > 0) {
                ConfigurationCopy conf = new ConfigurationCopy(opts.tableConfig);
                iterator = IteratorUtil.loadIterators(IteratorScope.scan, iterator, null, conf, serverSideIteratorList, serverSideIteratorOptions, new IterEnv());
            } else {
                iterator = IteratorUtil.loadIterators(iterator, serverSideIteratorList, serverSideIteratorOptions, new IterEnv(), false, null);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        iterator.seek(getRange() == null ? EMPTY_RANGE : getRange(), families, families.size() == 0 ? false : true);
        return new IteratorAdapter(iterator);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) IteratorAdapter(org.apache.accumulo.core.iterators.IteratorAdapter) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) RFile(org.apache.accumulo.core.file.rfile.RFile) IOException(java.io.IOException) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) Column(org.apache.accumulo.core.data.Column) Value(org.apache.accumulo.core.data.Value) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Key(org.apache.accumulo.core.data.Key) ByteSequence(org.apache.accumulo.core.data.ByteSequence)

Example 10 with MultiIterator

use of org.apache.accumulo.core.iterators.system.MultiIterator in project accumulo by apache.

the class AggregatingIteratorTest method test5.

@SuppressWarnings("deprecation")
@Test
public void test5() throws IOException {
    // try aggregating across multiple data sets that contain
    // the exact same keys w/ different values
    TreeMap<Key, Value> tm1 = new TreeMap<>();
    newKeyValue(tm1, 1, 1, 1, 1, false, "2");
    TreeMap<Key, Value> tm2 = new TreeMap<>();
    newKeyValue(tm2, 1, 1, 1, 1, false, "3");
    TreeMap<Key, Value> tm3 = new TreeMap<>();
    newKeyValue(tm3, 1, 1, 1, 1, false, "4");
    AggregatingIterator ai = new AggregatingIterator();
    Map<String, String> opts = new HashMap<>();
    opts.put("cf001", SummationAggregator.class.getName());
    List<SortedKeyValueIterator<Key, Value>> sources = new ArrayList<>(3);
    sources.add(new SortedMapIterator(tm1));
    sources.add(new SortedMapIterator(tm2));
    sources.add(new SortedMapIterator(tm3));
    MultiIterator mi = new MultiIterator(sources, true);
    ai.init(mi, opts, null);
    ai.seek(new Range(), EMPTY_COL_FAMS, false);
    assertTrue(ai.hasTop());
    assertEquals(newKey(1, 1, 1, 1), ai.getTopKey());
    assertEquals("9", ai.getTopValue().toString());
}
Also used : MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) Range(org.apache.accumulo.core.data.Range) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

MultiIterator (org.apache.accumulo.core.iterators.system.MultiIterator)14 Key (org.apache.accumulo.core.data.Key)13 Value (org.apache.accumulo.core.data.Value)12 ArrayList (java.util.ArrayList)11 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)10 Range (org.apache.accumulo.core.data.Range)6 Text (org.apache.hadoop.io.Text)6 TreeMap (java.util.TreeMap)5 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)5 IOException (java.io.IOException)4 SortedMapIterator (org.apache.accumulo.core.iterators.SortedMapIterator)4 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)3 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)3 PartialKey (org.apache.accumulo.core.data.PartialKey)3 SamplerConfigurationImpl (org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl)3 CachedConfiguration (org.apache.accumulo.core.util.CachedConfiguration)3 Configuration (org.apache.hadoop.conf.Configuration)3 Test (org.junit.Test)3 HashMap (java.util.HashMap)2 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)2