Search in sources :

Example 6 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class IndexedDocIteratorTest method test3.

@Test
public void test3() throws IOException {
    columnFamilies = new Text[6];
    columnFamilies[0] = new Text("C");
    columnFamilies[1] = new Text("E");
    columnFamilies[2] = new Text("G");
    columnFamilies[3] = new Text("H");
    columnFamilies[4] = new Text("I");
    columnFamilies[5] = new Text("J");
    otherColumnFamilies = new Text[4];
    otherColumnFamilies[0] = new Text("A");
    otherColumnFamilies[1] = new Text("B");
    otherColumnFamilies[2] = new Text("D");
    otherColumnFamilies[3] = new Text("F");
    float hitRatio = 0.5f;
    HashSet<Text> docs = new HashSet<>();
    SortedKeyValueIterator<Key, Value> source = createIteratorStack(hitRatio, NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
    SortedKeyValueIterator<Key, Value> source2 = createIteratorStack(hitRatio, NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
    ArrayList<SortedKeyValueIterator<Key, Value>> sourceIters = new ArrayList<>();
    sourceIters.add(source);
    sourceIters.add(source2);
    MultiIterator mi = new MultiIterator(sourceIters, false);
    IteratorSetting is = new IteratorSetting(1, IndexedDocIterator.class);
    IndexedDocIterator.setColumnFamilies(is, columnFamilies);
    IndexedDocIterator.setColfs(is, indexColf.toString(), docColfPrefix);
    IndexedDocIterator iter = new IndexedDocIterator();
    iter.init(mi, is.getOptions(), env);
    iter.seek(new Range(), EMPTY_COL_FAMS, false);
    int hitCount = 0;
    while (iter.hasTop()) {
        hitCount++;
        Key k = iter.getTopKey();
        Value v = iter.getTopValue();
        Text d = IndexedDocIterator.parseDocID(k);
        assertTrue(docs.contains(d));
        assertTrue(new String(v.get()).endsWith(" docID=" + d));
        iter.next();
    }
    assertEquals(hitCount, docs.size());
    cleanup();
}
Also used : MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) RFileTest(org.apache.accumulo.core.file.rfile.RFileTest) Test(org.junit.jupiter.api.Test)

Example 7 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class WholeColumnFamilyIteratorTest method testBug1.

@Test
public void testBug1() throws Exception {
    SortedMap<Key, Value> map1 = new TreeMap<>();
    pkv(map1, "row1", "cf1", "cq1", "cv1", 5, "foo");
    pkv(map1, "row1", "cf1", "cq2", "cv1", 6, "bar");
    SortedMap<Key, Value> map2 = new TreeMap<>();
    pkv(map2, "row2", "cf1", "cq1", "cv1", 5, "foo");
    SortedMap<Key, Value> map = new TreeMap<>();
    map.putAll(map1);
    map.putAll(map2);
    MultiIterator source = new MultiIterator(Collections.singletonList(new SortedMapIterator(map)), new Range(null, true, new Text("row1"), true));
    WholeColumnFamilyIterator iter = new WholeColumnFamilyIterator(source);
    Range range = new Range(new Text("row1"), true, new Text("row2"), true);
    iter.seek(range, new ArrayList<>(), false);
    assertTrue(iter.hasTop());
    assertEquals(map1, WholeColumnFamilyIterator.decodeColumnFamily(iter.getTopKey(), iter.getTopValue()));
    // simulate something continuing using the last key from the iterator
    // this is what client and server code will do
    range = new Range(iter.getTopKey(), false, range.getEndKey(), range.isEndKeyInclusive());
    iter.seek(range, new ArrayList<>(), false);
    assertFalse(iter.hasTop());
}
Also used : MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) Value(org.apache.accumulo.core.data.Value) Text(org.apache.hadoop.io.Text) TreeMap(java.util.TreeMap) SortedMapIterator(org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) Test(org.junit.jupiter.api.Test)

Example 8 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class MultiIteratorTest method verify.

void verify(int start, int end, Key seekKey, Text endRow, Text prevEndRow, boolean init, boolean incrRow, List<TreeMap<Key, Value>> maps) throws IOException {
    List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(maps.size());
    for (TreeMap<Key, Value> map : maps) {
        iters.add(new SortedMapIterator(map));
    }
    MultiIterator mi;
    if (endRow == null && prevEndRow == null)
        mi = new MultiIterator(iters, init);
    else {
        Range range = new Range(prevEndRow, false, endRow, true);
        if (init)
            for (SortedKeyValueIterator<Key, Value> iter : iters) iter.seek(range, Set.of(), false);
        mi = new MultiIterator(iters, range);
        if (init)
            mi.seek(range, Set.of(), false);
    }
    if (seekKey != null)
        mi.seek(new Range(seekKey, null), EMPTY_COL_FAMS, false);
    else
        mi.seek(new Range(), EMPTY_COL_FAMS, false);
    int i = start;
    while (mi.hasTop()) {
        if (incrRow)
            assertEquals(newKey(i, 0), mi.getTopKey());
        else
            assertEquals(newKey(0, i), mi.getTopKey());
        assertEquals("v" + i, mi.getTopValue().toString());
        mi.next();
        if (incrRow)
            i++;
        else
            i--;
    }
    assertEquals(end, i, "start=" + start + " end=" + end + " seekKey=" + seekKey + " endRow=" + endRow + " prevEndRow=" + prevEndRow + " init=" + init + " incrRow=" + incrRow + " maps=" + maps);
}
Also used : MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) ArrayList(java.util.ArrayList) Value(org.apache.accumulo.core.data.Value) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) SortedMapIterator(org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key)

Example 9 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class FileUtil method reduceFiles.

public static Collection<TabletFile> reduceFiles(ServerContext context, Configuration conf, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, int maxFiles, Path tmpDir, int pass) throws IOException {
    AccumuloConfiguration acuConf = context.getConfiguration();
    ArrayList<TabletFile> paths = new ArrayList<>(mapFiles);
    if (paths.size() <= maxFiles)
        return paths;
    String newDir = String.format("%s/pass_%04d", tmpDir, pass);
    int start = 0;
    ArrayList<TabletFile> outFiles = new ArrayList<>();
    int count = 0;
    while (start < paths.size()) {
        int end = Math.min(maxFiles + start, paths.size());
        List<TabletFile> inFiles = paths.subList(start, end);
        start = end;
        TabletFile newMapFile = new TabletFile(new Path(String.format("%s/%04d.%s", newDir, count++, RFile.EXTENSION)));
        outFiles.add(newMapFile);
        FileSystem ns = context.getVolumeManager().getFileSystemByPath(newMapFile.getPath());
        FileSKVWriter writer = new RFileOperations().newWriterBuilder().forFile(newMapFile.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(acuConf).build();
        writer.startDefaultLocalityGroup();
        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(inFiles.size());
        FileSKVIterator reader = null;
        try {
            for (TabletFile file : inFiles) {
                ns = context.getVolumeManager().getFileSystemByPath(file.getPath());
                reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(file.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(acuConf).build();
                iters.add(reader);
            }
            MultiIterator mmfi = new MultiIterator(iters, true);
            while (mmfi.hasTop()) {
                Key key = mmfi.getTopKey();
                boolean gtPrevEndRow = prevEndRow == null || key.compareRow(prevEndRow) > 0;
                boolean lteEndRow = endRow == null || key.compareRow(endRow) <= 0;
                if (gtPrevEndRow && lteEndRow)
                    writer.append(key, new Value());
                if (!lteEndRow)
                    break;
                mmfi.next();
            }
        } finally {
            try {
                if (reader != null)
                    reader.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
            }
            for (SortedKeyValueIterator<Key, Value> r : iters) try {
                if (r != null)
                    ((FileSKVIterator) r).close();
            } catch (IOException e) {
                // continue closing
                log.error("{}", e.getMessage(), e);
            }
            try {
                writer.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
                throw e;
            }
        }
    }
    return reduceFiles(context, conf, prevEndRow, endRow, outFiles, maxFiles, tmpDir, pass + 1);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) IOException(java.io.IOException) RFileOperations(org.apache.accumulo.core.file.rfile.RFileOperations) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) TabletFile(org.apache.accumulo.core.metadata.TabletFile) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Example 10 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class ScanDataSource method createIterator.

private SortedKeyValueIterator<Key, Value> createIterator() throws IOException {
    Map<TabletFile, DataFileValue> files;
    SamplerConfigurationImpl samplerConfig = scanParams.getSamplerConfigurationImpl();
    synchronized (tablet) {
        if (memIters != null)
            throw new IllegalStateException("Tried to create new scan iterator w/o releasing memory");
        if (tablet.isClosed())
            throw new TabletClosedException();
        if (interruptFlag.get())
            throw new IterationInterruptedException(tablet.getExtent() + " " + interruptFlag.hashCode());
        // only acquire the file manager when we know the tablet is open
        if (fileManager == null) {
            fileManager = tablet.getTabletResources().newScanFileManager(scanParams.getScanDispatch());
            tablet.getTabletServer().getScanMetrics().incrementOpenFiles(fileManager.getNumOpenFiles());
            tablet.addActiveScans(this);
        }
        if (fileManager.getNumOpenFiles() != 0)
            throw new IllegalStateException("Tried to create new scan iterator w/o releasing files");
        // set this before trying to get iterators in case
        // getIterators() throws an exception
        expectedDeletionCount = tablet.getDataSourceDeletions();
        memIters = tablet.getTabletMemory().getIterators(samplerConfig);
        Pair<Long, Map<TabletFile, DataFileValue>> reservation = tablet.getDatafileManager().reserveFilesForScan();
        fileReservationId = reservation.getFirst();
        files = reservation.getSecond();
    }
    Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, scanParams.isIsolated(), samplerConfig);
    for (SortedKeyValueIterator<Key, Value> skvi : Iterables.concat(mapfiles, memIters)) ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
    List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(mapfiles.size() + memIters.size());
    iters.addAll(mapfiles);
    iters.addAll(memIters);
    MultiIterator multiIter = new MultiIterator(iters, tablet.getExtent());
    TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(tablet.getTabletServer().getContext(), IteratorScope.scan, tablet.getTableConfiguration(), tablet.getExtent().tableId(), fileManager, files, scanParams.getAuthorizations(), samplerConfig, new ArrayList<>());
    statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, tablet.getScannedCounter());
    SortedKeyValueIterator<Key, Value> visFilter = SystemIteratorUtil.setupSystemScanIterators(statsIterator, scanParams.getColumnSet(), scanParams.getAuthorizations(), defaultLabels, tablet.getTableConfiguration());
    if (loadIters) {
        List<IterInfo> iterInfos;
        Map<String, Map<String, String>> iterOpts;
        ParsedIteratorConfig pic = tablet.getTableConfiguration().getParsedIteratorConfig(IteratorScope.scan);
        if (scanParams.getSsiList().isEmpty() && scanParams.getSsio().isEmpty()) {
            // No scan time iterator options were set, so can just use the pre-parsed table iterator
            // options.
            iterInfos = pic.getIterInfo();
            iterOpts = pic.getOpts();
        } else {
            // Scan time iterator options were set, so need to merge those with pre-parsed table
            // iterator options.
            iterOpts = new HashMap<>(pic.getOpts().size() + scanParams.getSsio().size());
            iterInfos = new ArrayList<>(pic.getIterInfo().size() + scanParams.getSsiList().size());
            IterConfigUtil.mergeIteratorConfig(iterInfos, iterOpts, pic.getIterInfo(), pic.getOpts(), scanParams.getSsiList(), scanParams.getSsio());
        }
        String context;
        if (scanParams.getClassLoaderContext() != null) {
            log.trace("Loading iterators for scan with scan context: {}", scanParams.getClassLoaderContext());
            context = scanParams.getClassLoaderContext();
        } else {
            context = pic.getServiceEnv();
            if (context != null) {
                log.trace("Loading iterators for scan with table context: {}", scanParams.getClassLoaderContext());
            } else {
                log.trace("Loading iterators for scan");
            }
        }
        IterLoad il = new IterLoad().iters(iterInfos).iterOpts(iterOpts).iterEnv(iterEnv).useAccumuloClassLoader(true).context(context);
        return iterEnv.getTopLevelIterator(IterConfigUtil.loadIterators(visFilter, il));
    } else {
        return visFilter;
    }
}
Also used : SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) ArrayList(java.util.ArrayList) InterruptibleIterator(org.apache.accumulo.core.iteratorsImpl.system.InterruptibleIterator) IterInfo(org.apache.accumulo.core.dataImpl.thrift.IterInfo) TabletIteratorEnvironment(org.apache.accumulo.server.iterators.TabletIteratorEnvironment) IterationInterruptedException(org.apache.accumulo.core.iteratorsImpl.system.IterationInterruptedException) TabletFile(org.apache.accumulo.core.metadata.TabletFile) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) StatsIterator(org.apache.accumulo.core.iteratorsImpl.system.StatsIterator) IterLoad(org.apache.accumulo.core.conf.IterLoad) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Value(org.apache.accumulo.core.data.Value) ParsedIteratorConfig(org.apache.accumulo.server.conf.TableConfiguration.ParsedIteratorConfig) HashMap(java.util.HashMap) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key)

Aggregations

MultiIterator (org.apache.accumulo.core.iteratorsImpl.system.MultiIterator)20 Key (org.apache.accumulo.core.data.Key)19 Value (org.apache.accumulo.core.data.Value)18 ArrayList (java.util.ArrayList)17 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)17 Range (org.apache.accumulo.core.data.Range)11 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)8 SortedMapIterator (org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator)8 Text (org.apache.hadoop.io.Text)8 Test (org.junit.jupiter.api.Test)8 TreeMap (java.util.TreeMap)7 IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)6 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)4 IterLoad (org.apache.accumulo.core.conf.IterLoad)4 TreeSet (java.util.TreeSet)3 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)3 TabletFile (org.apache.accumulo.core.metadata.TabletFile)3 Parameter (com.beust.jcommander.Parameter)2 AutoService (com.google.auto.service.AutoService)2