Search in sources :

Example 1 with CachableBuilder

use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.

the class SummaryReader method load.

public static SummaryReader load(FileSystem fs, Configuration conf, SummarizerFactory factory, Path file, Predicate<SummarizerConfiguration> summarySelector, BlockCache summaryCache, BlockCache indexCache, Cache<String, Long> fileLenCache, CryptoService cryptoService) {
    CachableBlockFile.Reader bcReader = null;
    try {
        // the reason BCFile is used instead of RFile is to avoid reading in the RFile meta block when
        // only summary data is wanted.
        CompositeCache compositeCache = new CompositeCache(summaryCache, indexCache);
        CachableBuilder cb = new CachableBuilder().fsPath(fs, file).conf(conf).fileLen(fileLenCache).cacheProvider(new BasicCacheProvider(compositeCache, null)).cryptoService(cryptoService);
        bcReader = new CachableBlockFile.Reader(cb);
        return load(bcReader, summarySelector, factory);
    } catch (FileNotFoundException fne) {
        return getEmptyReader(factory);
    } catch (IOException e) {
        try {
            if (!fs.exists(file)) {
                return getEmptyReader(factory);
            }
        } catch (IOException e1) {
        }
        throw new UncheckedIOException(e);
    } finally {
        if (bcReader != null) {
            try {
                bcReader.close();
            } catch (IOException e) {
                throw new UncheckedIOException(e);
            }
        }
    }
}
Also used : BasicCacheProvider(org.apache.accumulo.core.file.blockfile.impl.BasicCacheProvider) FileNotFoundException(java.io.FileNotFoundException) CachableBlockFile(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) CachableBuilder(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder)

Example 2 with CachableBuilder

use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.

the class MultiLevelIndexTest method runTest.

private void runTest(int maxBlockSize, int num) throws IOException {
    AccumuloConfiguration aconf = DefaultConfiguration.getInstance();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    FSDataOutputStream dos = new FSDataOutputStream(baos, new FileSystem.Statistics("a"));
    BCFile.Writer _cbw = new BCFile.Writer(dos, null, "gz", hadoopConf, CryptoServiceFactory.newInstance(aconf, JAVA));
    BufferedWriter mliw = new BufferedWriter(new Writer(_cbw, maxBlockSize));
    for (int i = 0; i < num; i++) mliw.add(new Key(String.format("%05d000", i)), i, 0, 0, 0);
    mliw.addLast(new Key(String.format("%05d000", num)), num, 0, 0, 0);
    BCFile.Writer.BlockAppender root = _cbw.prepareMetaBlock("root");
    mliw.close(root);
    root.close();
    _cbw.close();
    dos.close();
    baos.close();
    byte[] data = baos.toByteArray();
    SeekableByteArrayInputStream bais = new SeekableByteArrayInputStream(data);
    FSDataInputStream in = new FSDataInputStream(bais);
    CachableBuilder cb = new CachableBuilder().input(in, "source-1").length(data.length).conf(hadoopConf).cryptoService(CryptoServiceFactory.newInstance(aconf, JAVA));
    CachableBlockFile.Reader _cbr = new CachableBlockFile.Reader(cb);
    Reader reader = new Reader(_cbr, RFile.RINDEX_VER_8);
    CachableBlockFile.CachedBlockRead rootIn = _cbr.getMetaBlock("root");
    reader.readFields(rootIn);
    rootIn.close();
    IndexIterator liter = reader.lookup(new Key("000000"));
    int count = 0;
    while (liter.hasNext()) {
        assertEquals(count, liter.nextIndex());
        assertEquals(count, liter.peek().getNumEntries());
        assertEquals(count, liter.next().getNumEntries());
        count++;
    }
    assertEquals(num + 1, count);
    while (liter.hasPrevious()) {
        count--;
        assertEquals(count, liter.previousIndex());
        assertEquals(count, liter.peekPrevious().getNumEntries());
        assertEquals(count, liter.previous().getNumEntries());
    }
    assertEquals(0, count);
    // go past the end
    liter = reader.lookup(new Key(String.format("%05d000", num + 1)));
    assertFalse(liter.hasNext());
    random.ints(100, 0, num * 1_000).forEach(k -> {
        int expected;
        if (k % 1000 == 0)
            // end key is inclusive
            expected = k / 1000;
        else
            expected = k / 1000 + 1;
        try {
            IndexEntry ie = reader.lookup(new Key(String.format("%08d", k))).next();
            assertEquals(expected, ie.getNumEntries());
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    });
}
Also used : Reader(org.apache.accumulo.core.file.rfile.MultiLevelIndex.Reader) IndexEntry(org.apache.accumulo.core.file.rfile.MultiLevelIndex.IndexEntry) UncheckedIOException(java.io.UncheckedIOException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IndexIterator(org.apache.accumulo.core.file.rfile.MultiLevelIndex.Reader.IndexIterator) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) BCFile(org.apache.accumulo.core.file.rfile.bcfile.BCFile) BufferedWriter(org.apache.accumulo.core.file.rfile.MultiLevelIndex.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) SeekableByteArrayInputStream(org.apache.accumulo.core.file.rfile.RFileTest.SeekableByteArrayInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) CachableBlockFile(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) CachableBuilder(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder) BufferedWriter(org.apache.accumulo.core.file.rfile.MultiLevelIndex.BufferedWriter) Writer(org.apache.accumulo.core.file.rfile.MultiLevelIndex.Writer) Key(org.apache.accumulo.core.data.Key) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Example 3 with CachableBuilder

use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.

the class RFileScanner method iterator.

@Override
public Iterator<Entry<Key, Value>> iterator() {
    try {
        RFileSource[] sources = opts.in.getSources();
        List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(sources.length);
        CacheProvider cacheProvider = new BasicCacheProvider(indexCache, dataCache);
        for (int i = 0; i < sources.length; i++) {
            // TODO may have been a bug with multiple files and caching in older version...
            FSDataInputStream inputStream = (FSDataInputStream) sources[i].getInputStream();
            CachableBuilder cb = new CachableBuilder().input(inputStream, "source-" + i).length(sources[i].getLength()).conf(opts.in.getConf()).cacheProvider(cacheProvider).cryptoService(cryptoService);
            readers.add(new RFile.Reader(cb));
        }
        if (getSamplerConfiguration() != null) {
            for (int i = 0; i < readers.size(); i++) {
                readers.set(i, ((Reader) readers.get(i)).getSample(new SamplerConfigurationImpl(getSamplerConfiguration())));
            }
        }
        SortedKeyValueIterator<Key, Value> iterator;
        if (opts.bounds != null) {
            iterator = new MultiIterator(readers, opts.bounds);
        } else {
            iterator = new MultiIterator(readers, false);
        }
        Set<ByteSequence> families = Collections.emptySet();
        if (opts.useSystemIterators) {
            SortedSet<Column> cols = this.getFetchedColumns();
            families = LocalityGroupUtil.families(cols);
            iterator = SystemIteratorUtil.setupSystemScanIterators(iterator, cols, getAuthorizations(), EMPTY_BYTES, tableConf);
        }
        try {
            if (opts.tableConfig != null && !opts.tableConfig.isEmpty()) {
                IterLoad il = IterConfigUtil.loadIterConf(IteratorScope.scan, serverSideIteratorList, serverSideIteratorOptions, tableConf);
                iterator = IterConfigUtil.loadIterators(iterator, il.iterEnv(new IterEnv()).useAccumuloClassLoader(true));
            } else {
                iterator = IterConfigUtil.loadIterators(iterator, new IterLoad().iters(serverSideIteratorList).iterOpts(serverSideIteratorOptions).iterEnv(new IterEnv()).useAccumuloClassLoader(false));
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        iterator.seek(getRange() == null ? EMPTY_RANGE : getRange(), families, !families.isEmpty());
        return new IteratorAdapter(iterator);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : IteratorAdapter(org.apache.accumulo.core.iterators.IteratorAdapter) BasicCacheProvider(org.apache.accumulo.core.file.blockfile.impl.BasicCacheProvider) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) ArrayList(java.util.ArrayList) RFile(org.apache.accumulo.core.file.rfile.RFile) Column(org.apache.accumulo.core.data.Column) CachableBuilder(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) IOException(java.io.IOException) BasicCacheProvider(org.apache.accumulo.core.file.blockfile.impl.BasicCacheProvider) CacheProvider(org.apache.accumulo.core.file.blockfile.impl.CacheProvider) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) IterLoad(org.apache.accumulo.core.conf.IterLoad) Value(org.apache.accumulo.core.data.Value) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Key(org.apache.accumulo.core.data.Key) ByteSequence(org.apache.accumulo.core.data.ByteSequence)

Example 4 with CachableBuilder

use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.

the class PrintInfo method execute.

@SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit is fine here because it's a utility class executed by a main()")
@Override
public void execute(final String[] args) throws Exception {
    Opts opts = new Opts();
    opts.parseArgs("accumulo rfile-info", args);
    if (opts.files.isEmpty()) {
        System.err.println("No files were given");
        System.exit(1);
    }
    if ((opts.fullKeys || opts.dump) && opts.formatterClazz != null) {
        System.err.println("--formatter argument is incompatible with --dump or --fullKeys, specify either, not both.");
        System.exit(1);
    }
    var siteConfig = opts.getSiteConfiguration();
    Configuration conf = new Configuration();
    for (String confFile : opts.configFiles) {
        log.debug("Adding Hadoop configuration file {}", confFile);
        conf.addResource(new Path(confFile));
    }
    LogHistogram kvHistogram = new LogHistogram();
    KeyStats dataKeyStats = new KeyStats();
    KeyStats indexKeyStats = new KeyStats();
    for (String arg : opts.files) {
        Path path = new Path(arg);
        FileSystem fs = resolveFS(log, conf, path);
        System.out.println("Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory()));
        printCryptoParams(path, fs);
        CachableBuilder cb = new CachableBuilder().fsPath(fs, path).conf(conf).cryptoService(CryptoServiceFactory.newInstance(siteConfig, ClassloaderType.JAVA));
        Reader iter = new RFile.Reader(cb);
        MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer();
        if (opts.vis || opts.hash) {
            iter.registerMetrics(vmg);
        }
        iter.printInfo(opts.printIndex);
        System.out.println();
        String propsPath = opts.getPropertiesPath();
        String[] mainArgs = propsPath == null ? new String[] { arg } : new String[] { "-props", propsPath, arg };
        org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(mainArgs);
        Map<String, ArrayList<ByteSequence>> localityGroupCF = null;
        if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats || opts.fullKeys || !StringUtils.isEmpty(opts.formatterClazz)) {
            localityGroupCF = iter.getLocalityGroupCF();
            FileSKVIterator dataIter;
            if (opts.useSample) {
                dataIter = iter.getSample();
                if (dataIter == null) {
                    System.out.println("ERROR : This rfile has no sample data");
                    return;
                }
            } else {
                dataIter = iter;
            }
            if (opts.keyStats) {
                FileSKVIterator indexIter = iter.getIndex();
                while (indexIter.hasTop()) {
                    indexKeyStats.add(indexIter.getTopKey());
                    indexIter.next();
                }
            }
            BiFunction<Key, Value, String> formatter = null;
            if (opts.formatterClazz != null) {
                final Class<? extends BiFunction<Key, Value, String>> formatterClass = getFormatter(opts.formatterClazz);
                formatter = formatterClass.getConstructor().newInstance();
            } else if (opts.fullKeys) {
                formatter = (key, value) -> key.toStringNoTruncate() + " -> " + value;
            } else if (opts.dump) {
                formatter = (key, value) -> key + " -> " + value;
            }
            for (String lgName : localityGroupCF.keySet()) {
                LocalityGroupUtil.seek(dataIter, new Range(), lgName, localityGroupCF);
                while (dataIter.hasTop()) {
                    Key key = dataIter.getTopKey();
                    Value value = dataIter.getTopValue();
                    if (formatter != null) {
                        System.out.println(formatter.apply(key, value));
                        if (System.out.checkError())
                            return;
                    }
                    if (opts.histogram) {
                        kvHistogram.add(key.getSize() + value.getSize());
                    }
                    if (opts.keyStats) {
                        dataKeyStats.add(key);
                    }
                    dataIter.next();
                }
            }
        }
        if (opts.printSummary) {
            SummaryReader.print(iter, System.out);
        }
        iter.close();
        if (opts.vis || opts.hash) {
            System.out.println();
            vmg.printMetrics(opts.hash, "Visibility", System.out);
        }
        if (opts.histogram) {
            System.out.println();
            kvHistogram.print("");
        }
        if (opts.keyStats) {
            System.out.println();
            System.out.println("Statistics for keys in data :");
            dataKeyStats.print("\t");
            System.out.println();
            System.out.println("Statistics for keys in index :");
            indexKeyStats.print("\t");
        }
        // If the output stream has closed, there is no reason to keep going.
        if (System.out.checkError()) {
            return;
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ConfigOpts(org.apache.accumulo.core.cli.ConfigOpts) ByteSequence(org.apache.accumulo.core.data.ByteSequence) Arrays(java.util.Arrays) CryptoUtils(org.apache.accumulo.core.crypto.CryptoUtils) NoFileEncrypter(org.apache.accumulo.core.spi.crypto.NoFileEncrypter) Parameter(com.beust.jcommander.Parameter) FileSystem(org.apache.hadoop.fs.FileSystem) BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocalityGroupUtil(org.apache.accumulo.core.util.LocalityGroupUtil) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) Value(org.apache.accumulo.core.data.Value) KeywordExecutable(org.apache.accumulo.start.spi.KeywordExecutable) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Utils(org.apache.accumulo.core.file.rfile.bcfile.Utils) Logger(org.slf4j.Logger) NumUtil(org.apache.accumulo.core.util.NumUtil) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) IOException(java.io.IOException) ClassloaderType(org.apache.accumulo.core.crypto.CryptoServiceFactory.ClassloaderType) SummaryReader(org.apache.accumulo.core.summary.SummaryReader) CryptoServiceFactory(org.apache.accumulo.core.crypto.CryptoServiceFactory) CachableBuilder(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder) Range(org.apache.accumulo.core.data.Range) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) List(java.util.List) AutoService(com.google.auto.service.AutoService) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Configuration(org.apache.hadoop.conf.Configuration) ConfigOpts(org.apache.accumulo.core.cli.ConfigOpts) ArrayList(java.util.ArrayList) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) SummaryReader(org.apache.accumulo.core.summary.SummaryReader) Range(org.apache.accumulo.core.data.Range) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) CachableBuilder(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 5 with CachableBuilder

use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.

the class SplitLarge method execute.

@Override
public void execute(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Opts opts = new Opts();
    opts.parseArgs("accumulo split-large", args);
    for (String file : opts.files) {
        AccumuloConfiguration aconf = opts.getSiteConfiguration();
        CryptoService cryptoService = CryptoServiceFactory.newInstance(aconf, CryptoServiceFactory.ClassloaderType.JAVA);
        Path path = new Path(file);
        CachableBuilder cb = new CachableBuilder().fsPath(fs, path).conf(conf).cryptoService(cryptoService);
        try (Reader iter = new RFile.Reader(cb)) {
            if (!file.endsWith(".rf")) {
                throw new IllegalArgumentException("File must end with .rf");
            }
            String smallName = file.substring(0, file.length() - 3) + "_small.rf";
            String largeName = file.substring(0, file.length() - 3) + "_large.rf";
            int blockSize = (int) aconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE);
            try (Writer small = new RFile.Writer(new BCFile.Writer(fs.create(new Path(smallName)), null, "gz", conf, cryptoService), blockSize);
                Writer large = new RFile.Writer(new BCFile.Writer(fs.create(new Path(largeName)), null, "gz", conf, cryptoService), blockSize)) {
                small.startDefaultLocalityGroup();
                large.startDefaultLocalityGroup();
                iter.seek(new Range(), new ArrayList<>(), false);
                while (iter.hasTop()) {
                    Key key = iter.getTopKey();
                    Value value = iter.getTopValue();
                    if (key.getSize() + value.getSize() < opts.maxSize) {
                        small.append(key, value);
                    } else {
                        large.append(key, value);
                    }
                    iter.next();
                }
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ConfigOpts(org.apache.accumulo.core.cli.ConfigOpts) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) BCFile(org.apache.accumulo.core.file.rfile.bcfile.BCFile) Range(org.apache.accumulo.core.data.Range) CryptoService(org.apache.accumulo.core.spi.crypto.CryptoService) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) CachableBuilder(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder) Writer(org.apache.accumulo.core.file.rfile.RFile.Writer) Key(org.apache.accumulo.core.data.Key) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Aggregations

CachableBuilder (org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder)6 IOException (java.io.IOException)4 Key (org.apache.accumulo.core.data.Key)4 Reader (org.apache.accumulo.core.file.rfile.RFile.Reader)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Range (org.apache.accumulo.core.data.Range)3 Value (org.apache.accumulo.core.data.Value)3 BasicCacheProvider (org.apache.accumulo.core.file.blockfile.impl.BasicCacheProvider)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 UncheckedIOException (java.io.UncheckedIOException)2 ArrayList (java.util.ArrayList)2 ConfigOpts (org.apache.accumulo.core.cli.ConfigOpts)2 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)2 ByteSequence (org.apache.accumulo.core.data.ByteSequence)2 CachableBlockFile (org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile)2 BCFile (org.apache.accumulo.core.file.rfile.bcfile.BCFile)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 Parameter (com.beust.jcommander.Parameter)1