Search in sources :

Example 16 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class MultiIteratorTest method test7.

@Test
public void test7() throws IOException {
    // TEst setting an endKey
    TreeMap<Key, Value> tm1 = new TreeMap<>();
    newKeyValue(tm1, 0, 3, false, "1");
    newKeyValue(tm1, 0, 2, false, "2");
    newKeyValue(tm1, 0, 1, false, "3");
    newKeyValue(tm1, 0, 0, false, "4");
    newKeyValue(tm1, 1, 2, false, "5");
    newKeyValue(tm1, 1, 1, false, "6");
    newKeyValue(tm1, 1, 0, false, "7");
    newKeyValue(tm1, 2, 1, false, "8");
    newKeyValue(tm1, 2, 0, false, "9");
    List<SortedKeyValueIterator<Key, Value>> skvil = new ArrayList<>(1);
    skvil.add(new SortedMapIterator(tm1));
    KeyExtent extent = new KeyExtent(TableId.of("tablename"), newRow(1), newRow(0));
    MultiIterator mi = new MultiIterator(skvil, extent);
    Range r1 = new Range((Text) null, (Text) null);
    mi.seek(r1, EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals("5", mi.getTopValue().toString());
    mi.next();
    assertTrue(mi.hasTop());
    assertEquals("6", mi.getTopValue().toString());
    mi.next();
    assertTrue(mi.hasTop());
    assertEquals("7", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    Range r2 = new Range(newKey(0, 0), true, newKey(1, 1), true);
    mi.seek(r2, EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals("5", mi.getTopValue().toString());
    mi.next();
    assertTrue(mi.hasTop());
    assertEquals("6", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    Range r3 = new Range(newKey(0, 0), false, newKey(1, 1), false);
    mi.seek(r3, EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals("5", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    Range r4 = new Range(newKey(1, 2), true, newKey(1, 1), false);
    mi.seek(r4, EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals("5", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    Range r5 = new Range(newKey(1, 2), false, newKey(1, 1), true);
    mi.seek(r5, EMPTY_COL_FAMS, false);
    assertTrue(mi.hasTop());
    assertEquals("6", mi.getTopValue().toString());
    mi.next();
    assertFalse(mi.hasTop());
    Range r6 = new Range(newKey(2, 1), true, newKey(2, 0), true);
    mi.seek(r6, EMPTY_COL_FAMS, false);
    assertFalse(mi.hasTop());
    Range r7 = new Range(newKey(0, 3), true, newKey(0, 1), true);
    mi.seek(r7, EMPTY_COL_FAMS, false);
    assertFalse(mi.hasTop());
}
Also used : MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) Value(org.apache.accumulo.core.data.Value) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) TreeMap(java.util.TreeMap) SortedMapIterator(org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) Key(org.apache.accumulo.core.data.Key) Test(org.junit.jupiter.api.Test)

Example 17 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class FileCompactor method compactLocalityGroup.

private void compactLocalityGroup(String lgName, Set<ByteSequence> columnFamilies, boolean inclusive, FileSKVWriter mfw, CompactionStats majCStats) throws IOException, CompactionCanceledException {
    ArrayList<FileSKVIterator> readers = new ArrayList<>(filesToCompact.size());
    Span compactSpan = TraceUtil.startSpan(this.getClass(), "compact");
    try (Scope span = compactSpan.makeCurrent()) {
        long entriesCompacted = 0;
        List<SortedKeyValueIterator<Key, Value>> iters = openMapDataFiles(readers);
        if (env.getIteratorScope() == IteratorScope.minc) {
            iters.add(env.getMinCIterator());
        }
        CountingIterator citr = new CountingIterator(new MultiIterator(iters, extent.toDataRange()), entriesRead);
        SortedKeyValueIterator<Key, Value> delIter = DeletingIterator.wrap(citr, propagateDeletes, DeletingIterator.getBehavior(acuTableConf));
        ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
        // if(env.getIteratorScope() )
        SystemIteratorEnvironment iterEnv = env.createIteratorEnv(context, acuTableConf, getExtent().tableId());
        SortedKeyValueIterator<Key, Value> itr = iterEnv.getTopLevelIterator(IterConfigUtil.convertItersAndLoad(env.getIteratorScope(), cfsi, acuTableConf, iterators, iterEnv));
        itr.seek(extent.toDataRange(), columnFamilies, inclusive);
        if (inclusive) {
            mfw.startNewLocalityGroup(lgName, columnFamilies);
        } else {
            mfw.startDefaultLocalityGroup();
        }
        Span writeSpan = TraceUtil.startSpan(this.getClass(), "write");
        try (Scope write = writeSpan.makeCurrent()) {
            while (itr.hasTop() && env.isCompactionEnabled()) {
                mfw.append(itr.getTopKey(), itr.getTopValue());
                itr.next();
                entriesCompacted++;
                if (entriesCompacted % 1024 == 0) {
                    // Periodically update stats, do not want to do this too often since its volatile
                    entriesWritten.addAndGet(1024);
                }
            }
            if (itr.hasTop() && !env.isCompactionEnabled()) {
                // cancel major compaction operation
                try {
                    try {
                        mfw.close();
                    } catch (IOException e) {
                        log.error("{}", e.getMessage(), e);
                    }
                    fs.deleteRecursively(outputFile.getPath());
                } catch (Exception e) {
                    log.warn("Failed to delete Canceled compaction output file {}", outputFile, e);
                }
                throw new CompactionCanceledException();
            }
        } finally {
            CompactionStats lgMajcStats = new CompactionStats(citr.getCount(), entriesCompacted);
            majCStats.add(lgMajcStats);
            writeSpan.end();
        }
    } catch (Exception e) {
        TraceUtil.setException(compactSpan, e, true);
        throw e;
    } finally {
        // close sequence files opened
        for (FileSKVIterator reader : readers) {
            try {
                reader.close();
            } catch (Exception e) {
                log.warn("Failed to close map file", e);
            }
        }
        compactSpan.end();
    }
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ColumnFamilySkippingIterator(org.apache.accumulo.core.iteratorsImpl.system.ColumnFamilySkippingIterator) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) IOException(java.io.IOException) Span(io.opentelemetry.api.trace.Span) IOException(java.io.IOException) IteratorScope(org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope) Scope(io.opentelemetry.context.Scope) Value(org.apache.accumulo.core.data.Value) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Key(org.apache.accumulo.core.data.Key) SystemIteratorEnvironment(org.apache.accumulo.server.iterators.SystemIteratorEnvironment)

Example 18 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class FileUtil method estimatePercentageLTE.

public static double estimatePercentageLTE(ServerContext context, String tabletDir, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, Text splitRow) throws IOException {
    Path tmpDir = null;
    int maxToOpen = context.getConfiguration().getCount(Property.TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN);
    ArrayList<FileSKVIterator> readers = new ArrayList<>(mapFiles.size());
    try {
        if (mapFiles.size() > maxToOpen) {
            tmpDir = createTmpDir(context, tabletDir);
            log.debug("Too many indexes ({}) to open at once for {} {}, reducing in tmpDir = {}", mapFiles.size(), endRow, prevEndRow, tmpDir);
            long t1 = System.currentTimeMillis();
            mapFiles = reduceFiles(context, context.getHadoopConf(), prevEndRow, endRow, mapFiles, maxToOpen, tmpDir, 0);
            long t2 = System.currentTimeMillis();
            log.debug("Finished reducing indexes for {} {} in {}", endRow, prevEndRow, String.format("%6.2f secs", (t2 - t1) / 1000.0));
        }
        if (prevEndRow == null)
            prevEndRow = new Text();
        long numKeys;
        numKeys = countIndexEntries(context, prevEndRow, endRow, mapFiles, true, readers);
        if (numKeys == 0) {
            // the data just punt and return .5
            return .5;
        }
        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(readers);
        MultiIterator mmfi = new MultiIterator(iters, true);
        // skip the prevEndRow
        while (mmfi.hasTop() && mmfi.getTopKey().compareRow(prevEndRow) <= 0) {
            mmfi.next();
        }
        int numLte = 0;
        while (mmfi.hasTop() && mmfi.getTopKey().compareRow(splitRow) <= 0) {
            numLte++;
            mmfi.next();
        }
        if (numLte > numKeys) {
            // something went wrong
            throw new RuntimeException("numLte > numKeys " + numLte + " " + numKeys + " " + prevEndRow + " " + endRow + " " + splitRow + " " + mapFiles);
        }
        // do not want to return 0% or 100%, so add 1 and 2 below
        return (numLte + 1) / (double) (numKeys + 2);
    } finally {
        cleanupIndexOp(tmpDir, context.getVolumeManager(), readers);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) Text(org.apache.hadoop.io.Text)

Example 19 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class FileUtil method findMidPoint.

/**
 * @param mapFiles
 *          - list MapFiles to find the mid point key
 *
 *          ISSUES : This method used the index files to find the mid point. If the map files have
 *          different index intervals this method will not return an accurate mid point. Also, it
 *          would be tricky to use this method in conjunction with an in memory map because the
 *          indexing interval is unknown.
 */
public static SortedMap<Double, Key> findMidPoint(ServerContext context, String tabletDirectory, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, double minSplit, boolean useIndex) throws IOException {
    Collection<TabletFile> origMapFiles = mapFiles;
    Path tmpDir = null;
    int maxToOpen = context.getConfiguration().getCount(Property.TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN);
    ArrayList<FileSKVIterator> readers = new ArrayList<>(mapFiles.size());
    try {
        if (mapFiles.size() > maxToOpen) {
            if (!useIndex)
                throw new IOException("Cannot find mid point using data files, too many " + mapFiles.size());
            tmpDir = createTmpDir(context, tabletDirectory);
            log.debug("Too many indexes ({}) to open at once for {} {}, reducing in tmpDir = {}", mapFiles.size(), endRow, prevEndRow, tmpDir);
            long t1 = System.currentTimeMillis();
            mapFiles = reduceFiles(context, context.getHadoopConf(), prevEndRow, endRow, mapFiles, maxToOpen, tmpDir, 0);
            long t2 = System.currentTimeMillis();
            log.debug("Finished reducing indexes for {} {} in {}", endRow, prevEndRow, String.format("%6.2f secs", (t2 - t1) / 1000.0));
        }
        if (prevEndRow == null)
            prevEndRow = new Text();
        long t1 = System.currentTimeMillis();
        long numKeys;
        numKeys = countIndexEntries(context, prevEndRow, endRow, mapFiles, tmpDir == null ? useIndex : false, readers);
        if (numKeys == 0) {
            if (useIndex) {
                log.warn("Failed to find mid point using indexes, falling back to" + " data files which is slower. No entries between {} and {} for {}", prevEndRow, endRow, mapFiles);
                // need to pass original map files, not possibly reduced indexes
                return findMidPoint(context, tabletDirectory, prevEndRow, endRow, origMapFiles, minSplit, false);
            }
            return Collections.emptySortedMap();
        }
        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(readers);
        MultiIterator mmfi = new MultiIterator(iters, true);
        // skip the prevEndRow
        while (mmfi.hasTop() && mmfi.getTopKey().compareRow(prevEndRow) <= 0) mmfi.next();
        // read half of the keys in the index
        TreeMap<Double, Key> ret = new TreeMap<>();
        Key lastKey = null;
        long keysRead = 0;
        Key keyBeforeMidPoint = null;
        long keyBeforeMidPointPosition = 0;
        while (keysRead < numKeys / 2) {
            if (lastKey != null && !lastKey.equals(mmfi.getTopKey(), PartialKey.ROW) && (keysRead - 1) / (double) numKeys >= minSplit) {
                keyBeforeMidPoint = new Key(lastKey);
                keyBeforeMidPointPosition = keysRead - 1;
            }
            if (lastKey == null)
                lastKey = new Key();
            lastKey.set(mmfi.getTopKey());
            keysRead++;
            // consume minimum
            mmfi.next();
        }
        if (keyBeforeMidPoint != null)
            ret.put(keyBeforeMidPointPosition / (double) numKeys, keyBeforeMidPoint);
        long t2 = System.currentTimeMillis();
        log.debug(String.format("Found midPoint from indexes in %6.2f secs.%n", ((t2 - t1) / 1000.0)));
        ret.put(.5, mmfi.getTopKey());
        // sanity check
        for (Key key : ret.values()) {
            boolean inRange = (key.compareRow(prevEndRow) > 0 && (endRow == null || key.compareRow(endRow) < 1));
            if (!inRange) {
                throw new IOException("Found mid point is not in range " + key + " " + prevEndRow + " " + endRow + " " + mapFiles);
            }
        }
        return ret;
    } finally {
        cleanupIndexOp(tmpDir, context.getVolumeManager(), readers);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) TreeMap(java.util.TreeMap) TabletFile(org.apache.accumulo.core.metadata.TabletFile) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 20 with MultiIterator

use of org.apache.accumulo.core.iteratorsImpl.system.MultiIterator in project accumulo by apache.

the class CollectTabletStats method createScanIterator.

private static SortedKeyValueIterator<Key, Value> createScanIterator(KeyExtent ke, Collection<SortedKeyValueIterator<Key, Value>> mapfiles, Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet, List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, boolean useTableIterators, TableConfiguration conf) throws IOException {
    SortedMapIterator smi = new SortedMapIterator(new TreeMap<>());
    List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(mapfiles.size() + 1);
    iters.addAll(mapfiles);
    iters.add(smi);
    MultiIterator multiIter = new MultiIterator(iters, ke);
    SortedKeyValueIterator<Key, Value> delIter = DeletingIterator.wrap(multiIter, false, Behavior.PROCESS);
    ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
    SortedKeyValueIterator<Key, Value> colFilter = ColumnQualifierFilter.wrap(cfsi, columnSet);
    SortedKeyValueIterator<Key, Value> visFilter = VisibilityFilter.wrap(colFilter, authorizations, defaultLabels);
    if (useTableIterators) {
        IterLoad il = IterConfigUtil.loadIterConf(IteratorScope.scan, ssiList, ssio, conf);
        return IterConfigUtil.loadIterators(visFilter, il.useAccumuloClassLoader(true));
    }
    return visFilter;
}
Also used : ColumnFamilySkippingIterator(org.apache.accumulo.core.iteratorsImpl.system.ColumnFamilySkippingIterator) IterLoad(org.apache.accumulo.core.conf.IterLoad) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) ArrayList(java.util.ArrayList) Value(org.apache.accumulo.core.data.Value) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) SortedMapIterator(org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator) Key(org.apache.accumulo.core.data.Key)

Aggregations

MultiIterator (org.apache.accumulo.core.iteratorsImpl.system.MultiIterator)20 Key (org.apache.accumulo.core.data.Key)19 Value (org.apache.accumulo.core.data.Value)18 ArrayList (java.util.ArrayList)17 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)17 Range (org.apache.accumulo.core.data.Range)11 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)8 SortedMapIterator (org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator)8 Text (org.apache.hadoop.io.Text)8 Test (org.junit.jupiter.api.Test)8 TreeMap (java.util.TreeMap)7 IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)6 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)4 IterLoad (org.apache.accumulo.core.conf.IterLoad)4 TreeSet (java.util.TreeSet)3 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)3 TabletFile (org.apache.accumulo.core.metadata.TabletFile)3 Parameter (com.beust.jcommander.Parameter)2 AutoService (com.google.auto.service.AutoService)2