Search in sources :

Example 1 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class GenerateSplits method getSplitsBySize.

/**
 * Get number of splits based on requested size of split.
 */
private TreeSet<String> getSplitsBySize(AccumuloConfiguration accumuloConf, Configuration hadoopConf, List<Path> files, FileSystem fs, long splitSize, boolean base64encode) throws IOException {
    long currentSplitSize = 0;
    long totalSize = 0;
    TreeSet<String> splits = new TreeSet<>();
    List<FileSKVIterator> fileReaders = new ArrayList<>(files.size());
    List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(files.size());
    SortedKeyValueIterator<Key, Value> iterator;
    try {
        for (Path file : files) {
            FileSKVIterator reader = FileOperations.getInstance().newScanReaderBuilder().forFile(file.toString(), fs, hadoopConf, CryptoServiceFactory.newDefaultInstance()).withTableConfiguration(accumuloConf).overRange(new Range(), Set.of(), false).build();
            readers.add(reader);
            fileReaders.add(reader);
        }
        iterator = new MultiIterator(readers, false);
        iterator.seek(new Range(), Collections.emptySet(), false);
        while (iterator.hasTop()) {
            Key key = iterator.getTopKey();
            Value val = iterator.getTopValue();
            int size = key.getSize() + val.getSize();
            currentSplitSize += size;
            totalSize += size;
            if (currentSplitSize > splitSize) {
                splits.add(encode(base64encode, key.getRow()));
                currentSplitSize = 0;
            }
            iterator.next();
        }
    } finally {
        for (var r : fileReaders) {
            r.close();
        }
    }
    log.debug("Got {} splits with split size {} out of {} total bytes read across {} files", splits.size(), splitSize, totalSize, files.size());
    return splits;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) Range(org.apache.accumulo.core.data.Range) TreeSet(java.util.TreeSet) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key)

Example 2 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class OfflineIterator method createIterator.

private SortedKeyValueIterator<Key, Value> createIterator(KeyExtent extent, Collection<StoredTabletFile> absFiles) throws TableNotFoundException, AccumuloException, IOException {
    // possible race condition here, if table is renamed
    String tableName = context.getTableName(tableId);
    AccumuloConfiguration acuTableConf = new ConfigurationCopy(context.tableOperations().getConfiguration(tableName));
    Configuration conf = context.getHadoopConf();
    for (SortedKeyValueIterator<Key, Value> reader : readers) {
        ((FileSKVIterator) reader).close();
    }
    readers.clear();
    SamplerConfiguration scannerSamplerConfig = options.getSamplerConfiguration();
    SamplerConfigurationImpl scannerSamplerConfigImpl = scannerSamplerConfig == null ? null : new SamplerConfigurationImpl(scannerSamplerConfig);
    SamplerConfigurationImpl samplerConfImpl = SamplerConfigurationImpl.newSamplerConfig(acuTableConf);
    if (scannerSamplerConfigImpl != null && !scannerSamplerConfigImpl.equals(samplerConfImpl)) {
        throw new SampleNotPresentException();
    }
    for (TabletFile file : absFiles) {
        FileSystem fs = VolumeConfiguration.fileSystemForPath(file.getPathStr(), conf);
        FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(file.getPathStr(), fs, conf, CryptoServiceFactory.newDefaultInstance()).withTableConfiguration(acuTableConf).build();
        if (scannerSamplerConfigImpl != null) {
            reader = reader.getSample(scannerSamplerConfigImpl);
            if (reader == null)
                throw new SampleNotPresentException();
        }
        readers.add(reader);
    }
    MultiIterator multiIter = new MultiIterator(readers, extent);
    OfflineIteratorEnvironment iterEnv = new OfflineIteratorEnvironment(authorizations, acuTableConf, false, samplerConfImpl == null ? null : samplerConfImpl.toSamplerConfiguration());
    byte[] defaultSecurityLabel;
    ColumnVisibility cv = new ColumnVisibility(acuTableConf.get(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY));
    defaultSecurityLabel = cv.getExpression();
    SortedKeyValueIterator<Key, Value> visFilter = SystemIteratorUtil.setupSystemScanIterators(multiIter, new HashSet<>(options.fetchedColumns), authorizations, defaultSecurityLabel, acuTableConf);
    IterLoad iterLoad = IterConfigUtil.loadIterConf(IteratorScope.scan, options.serverSideIteratorList, options.serverSideIteratorOptions, acuTableConf);
    return iterEnv.getTopLevelIterator(IterConfigUtil.loadIterators(visFilter, iterLoad.iterEnv(iterEnv).useAccumuloClassLoader(false)));
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) VolumeConfiguration(org.apache.accumulo.core.volume.VolumeConfiguration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) SampleNotPresentException(org.apache.accumulo.core.client.SampleNotPresentException) IterLoad(org.apache.accumulo.core.conf.IterLoad) FileSystem(org.apache.hadoop.fs.FileSystem) KeyValue(org.apache.accumulo.core.data.KeyValue) Value(org.apache.accumulo.core.data.Value) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TabletFile(org.apache.accumulo.core.metadata.TabletFile) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Example 3 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class WholeRowIteratorTest method testBug1.

@Test
public void testBug1() throws Exception {
    SortedMap<Key, Value> map1 = new TreeMap<>();
    pkv(map1, "row1", "cf1", "cq1", "cv1", 5, "foo");
    pkv(map1, "row1", "cf1", "cq2", "cv1", 6, "bar");
    SortedMap<Key, Value> map2 = new TreeMap<>();
    pkv(map2, "row2", "cf1", "cq1", "cv1", 5, "foo");
    SortedMap<Key, Value> map = new TreeMap<>();
    map.putAll(map1);
    map.putAll(map2);
    MultiIterator source = new MultiIterator(Collections.singletonList(new SortedMapIterator(map)), new Range(null, true, new Text("row1"), true));
    WholeRowIterator iter = new WholeRowIterator(source);
    Range range = new Range(new Text("row1"), true, new Text("row2"), true);
    iter.seek(range, new ArrayList<>(), false);
    assertTrue(iter.hasTop());
    assertEquals(map1, WholeRowIterator.decodeRow(iter.getTopKey(), iter.getTopValue()));
    // simulate something continuing using the last key from the iterator
    // this is what client and server code will do
    range = new Range(iter.getTopKey(), false, range.getEndKey(), range.isEndKeyInclusive());
    iter.seek(range, new ArrayList<>(), false);
    assertFalse(iter.hasTop());
}
Also used : MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) Value(org.apache.accumulo.core.data.Value) Text(org.apache.hadoop.io.Text) TreeMap(java.util.TreeMap) SortedMapIterator(org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) Test(org.junit.jupiter.api.Test)

Example 4 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class MultiIteratorTest method test4.

@Test
public void test4() throws IOException {
    // TEST empty input
    TreeMap<Key, Value> tm1 = new TreeMap<>();
    List<SortedKeyValueIterator<Key, Value>> skvil = new ArrayList<>(1);
    skvil.add(new SortedMapIterator(tm1));
    MultiIterator mi = new MultiIterator(skvil, true);
    assertFalse(mi.hasTop());
    mi.seek(newRange(0, 6), EMPTY_COL_FAMS, false);
    assertFalse(mi.hasTop());
}
Also used : MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) Value(org.apache.accumulo.core.data.Value) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) TreeMap(java.util.TreeMap) SortedMapIterator(org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator) Key(org.apache.accumulo.core.data.Key) Test(org.junit.jupiter.api.Test)

Example 5 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class MultiIteratorTest method test6.

@Test
public void test6() throws IOException {
    // TEst setting an endKey
    TreeMap<Key, Value> tm1 = new TreeMap<>();
    newKeyValue(tm1, 3, 0, false, "1");
    newKeyValue(tm1, 4, 0, false, "2");
    newKeyValue(tm1, 6, 0, false, "3");
    List<SortedKeyValueIterator<Key, Value>> skvil = new ArrayList<>(1);
    skvil.add(new SortedMapIterator(tm1));
    MultiIterator mi = new MultiIterator(skvil, true);
    mi.seek(new Range(null, true, newKey(5, 9), false), EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals(mi.getTopKey(), newKey(3, 0));
    assertEquals("1", mi.getTopValue().toString());
    mi.next();
    assertTrue(mi.hasTop());
    assertEquals(mi.getTopKey(), newKey(4, 0));
    assertEquals("2", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    mi.seek(new Range(newKey(4, 10), true, newKey(5, 9), false), EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals(mi.getTopKey(), newKey(4, 0));
    assertEquals("2", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    mi.seek(new Range(newKey(4, 10), true, newKey(6, 0), false), EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals(mi.getTopKey(), newKey(4, 0));
    assertEquals("2", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    mi.seek(new Range(newKey(4, 10), true, newKey(6, 0), true), EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals(mi.getTopKey(), newKey(4, 0));
    assertEquals("2", mi.getTopValue().toString());
    mi.next();
    assertTrue(mi.hasTop());
    assertEquals(mi.getTopKey(), newKey(6, 0));
    assertEquals("3", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    mi.seek(new Range(newKey(4, 0), true, newKey(6, 0), false), EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals(mi.getTopKey(), newKey(4, 0));
    assertEquals("2", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    mi.seek(new Range(newKey(4, 0), false, newKey(6, 0), false), EMPTY_COL_FAMS, false);
    assertFalse(mi.hasTop());
    mi.seek(new Range(newKey(4, 0), false, newKey(6, 0), true), EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals(mi.getTopKey(), newKey(6, 0));
    assertEquals("3", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
}
Also used : MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) Value(org.apache.accumulo.core.data.Value) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) TreeMap(java.util.TreeMap) SortedMapIterator(org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) Test(org.junit.jupiter.api.Test)

Aggregations

MultiIterator (org.apache.accumulo.core.iteratorsImpl.system.MultiIterator)20 Key (org.apache.accumulo.core.data.Key)19 Value (org.apache.accumulo.core.data.Value)18 ArrayList (java.util.ArrayList)17 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)17 Range (org.apache.accumulo.core.data.Range)11 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)8 SortedMapIterator (org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator)8 Text (org.apache.hadoop.io.Text)8 Test (org.junit.jupiter.api.Test)8 TreeMap (java.util.TreeMap)7 IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)6 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)4 IterLoad (org.apache.accumulo.core.conf.IterLoad)4 TreeSet (java.util.TreeSet)3 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)3 TabletFile (org.apache.accumulo.core.metadata.TabletFile)3 Parameter (com.beust.jcommander.Parameter)2 AutoService (com.google.auto.service.AutoService)2