use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.
the class SummaryReader method load.
public static SummaryReader load(FileSystem fs, Configuration conf, SummarizerFactory factory, Path file, Predicate<SummarizerConfiguration> summarySelector, BlockCache summaryCache, BlockCache indexCache, Cache<String, Long> fileLenCache, CryptoService cryptoService) {
CachableBlockFile.Reader bcReader = null;
try {
// the reason BCFile is used instead of RFile is to avoid reading in the RFile meta block when
// only summary data is wanted.
CompositeCache compositeCache = new CompositeCache(summaryCache, indexCache);
CachableBuilder cb = new CachableBuilder().fsPath(fs, file).conf(conf).fileLen(fileLenCache).cacheProvider(new BasicCacheProvider(compositeCache, null)).cryptoService(cryptoService);
bcReader = new CachableBlockFile.Reader(cb);
return load(bcReader, summarySelector, factory);
} catch (FileNotFoundException fne) {
return getEmptyReader(factory);
} catch (IOException e) {
try {
if (!fs.exists(file)) {
return getEmptyReader(factory);
}
} catch (IOException e1) {
}
throw new UncheckedIOException(e);
} finally {
if (bcReader != null) {
try {
bcReader.close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
}
use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.
the class MultiLevelIndexTest method runTest.
private void runTest(int maxBlockSize, int num) throws IOException {
AccumuloConfiguration aconf = DefaultConfiguration.getInstance();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
FSDataOutputStream dos = new FSDataOutputStream(baos, new FileSystem.Statistics("a"));
BCFile.Writer _cbw = new BCFile.Writer(dos, null, "gz", hadoopConf, CryptoServiceFactory.newInstance(aconf, JAVA));
BufferedWriter mliw = new BufferedWriter(new Writer(_cbw, maxBlockSize));
for (int i = 0; i < num; i++) mliw.add(new Key(String.format("%05d000", i)), i, 0, 0, 0);
mliw.addLast(new Key(String.format("%05d000", num)), num, 0, 0, 0);
BCFile.Writer.BlockAppender root = _cbw.prepareMetaBlock("root");
mliw.close(root);
root.close();
_cbw.close();
dos.close();
baos.close();
byte[] data = baos.toByteArray();
SeekableByteArrayInputStream bais = new SeekableByteArrayInputStream(data);
FSDataInputStream in = new FSDataInputStream(bais);
CachableBuilder cb = new CachableBuilder().input(in, "source-1").length(data.length).conf(hadoopConf).cryptoService(CryptoServiceFactory.newInstance(aconf, JAVA));
CachableBlockFile.Reader _cbr = new CachableBlockFile.Reader(cb);
Reader reader = new Reader(_cbr, RFile.RINDEX_VER_8);
CachableBlockFile.CachedBlockRead rootIn = _cbr.getMetaBlock("root");
reader.readFields(rootIn);
rootIn.close();
IndexIterator liter = reader.lookup(new Key("000000"));
int count = 0;
while (liter.hasNext()) {
assertEquals(count, liter.nextIndex());
assertEquals(count, liter.peek().getNumEntries());
assertEquals(count, liter.next().getNumEntries());
count++;
}
assertEquals(num + 1, count);
while (liter.hasPrevious()) {
count--;
assertEquals(count, liter.previousIndex());
assertEquals(count, liter.peekPrevious().getNumEntries());
assertEquals(count, liter.previous().getNumEntries());
}
assertEquals(0, count);
// go past the end
liter = reader.lookup(new Key(String.format("%05d000", num + 1)));
assertFalse(liter.hasNext());
random.ints(100, 0, num * 1_000).forEach(k -> {
int expected;
if (k % 1000 == 0)
// end key is inclusive
expected = k / 1000;
else
expected = k / 1000 + 1;
try {
IndexEntry ie = reader.lookup(new Key(String.format("%08d", k))).next();
assertEquals(expected, ie.getNumEntries());
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.
the class RFileScanner method iterator.
@Override
public Iterator<Entry<Key, Value>> iterator() {
try {
RFileSource[] sources = opts.in.getSources();
List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(sources.length);
CacheProvider cacheProvider = new BasicCacheProvider(indexCache, dataCache);
for (int i = 0; i < sources.length; i++) {
// TODO may have been a bug with multiple files and caching in older version...
FSDataInputStream inputStream = (FSDataInputStream) sources[i].getInputStream();
CachableBuilder cb = new CachableBuilder().input(inputStream, "source-" + i).length(sources[i].getLength()).conf(opts.in.getConf()).cacheProvider(cacheProvider).cryptoService(cryptoService);
readers.add(new RFile.Reader(cb));
}
if (getSamplerConfiguration() != null) {
for (int i = 0; i < readers.size(); i++) {
readers.set(i, ((Reader) readers.get(i)).getSample(new SamplerConfigurationImpl(getSamplerConfiguration())));
}
}
SortedKeyValueIterator<Key, Value> iterator;
if (opts.bounds != null) {
iterator = new MultiIterator(readers, opts.bounds);
} else {
iterator = new MultiIterator(readers, false);
}
Set<ByteSequence> families = Collections.emptySet();
if (opts.useSystemIterators) {
SortedSet<Column> cols = this.getFetchedColumns();
families = LocalityGroupUtil.families(cols);
iterator = SystemIteratorUtil.setupSystemScanIterators(iterator, cols, getAuthorizations(), EMPTY_BYTES, tableConf);
}
try {
if (opts.tableConfig != null && !opts.tableConfig.isEmpty()) {
IterLoad il = IterConfigUtil.loadIterConf(IteratorScope.scan, serverSideIteratorList, serverSideIteratorOptions, tableConf);
iterator = IterConfigUtil.loadIterators(iterator, il.iterEnv(new IterEnv()).useAccumuloClassLoader(true));
} else {
iterator = IterConfigUtil.loadIterators(iterator, new IterLoad().iters(serverSideIteratorList).iterOpts(serverSideIteratorOptions).iterEnv(new IterEnv()).useAccumuloClassLoader(false));
}
} catch (IOException e) {
throw new RuntimeException(e);
}
iterator.seek(getRange() == null ? EMPTY_RANGE : getRange(), families, !families.isEmpty());
return new IteratorAdapter(iterator);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.
the class PrintInfo method execute.
@SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit is fine here because it's a utility class executed by a main()")
@Override
public void execute(final String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs("accumulo rfile-info", args);
if (opts.files.isEmpty()) {
System.err.println("No files were given");
System.exit(1);
}
if ((opts.fullKeys || opts.dump) && opts.formatterClazz != null) {
System.err.println("--formatter argument is incompatible with --dump or --fullKeys, specify either, not both.");
System.exit(1);
}
var siteConfig = opts.getSiteConfiguration();
Configuration conf = new Configuration();
for (String confFile : opts.configFiles) {
log.debug("Adding Hadoop configuration file {}", confFile);
conf.addResource(new Path(confFile));
}
LogHistogram kvHistogram = new LogHistogram();
KeyStats dataKeyStats = new KeyStats();
KeyStats indexKeyStats = new KeyStats();
for (String arg : opts.files) {
Path path = new Path(arg);
FileSystem fs = resolveFS(log, conf, path);
System.out.println("Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory()));
printCryptoParams(path, fs);
CachableBuilder cb = new CachableBuilder().fsPath(fs, path).conf(conf).cryptoService(CryptoServiceFactory.newInstance(siteConfig, ClassloaderType.JAVA));
Reader iter = new RFile.Reader(cb);
MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer();
if (opts.vis || opts.hash) {
iter.registerMetrics(vmg);
}
iter.printInfo(opts.printIndex);
System.out.println();
String propsPath = opts.getPropertiesPath();
String[] mainArgs = propsPath == null ? new String[] { arg } : new String[] { "-props", propsPath, arg };
org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(mainArgs);
Map<String, ArrayList<ByteSequence>> localityGroupCF = null;
if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats || opts.fullKeys || !StringUtils.isEmpty(opts.formatterClazz)) {
localityGroupCF = iter.getLocalityGroupCF();
FileSKVIterator dataIter;
if (opts.useSample) {
dataIter = iter.getSample();
if (dataIter == null) {
System.out.println("ERROR : This rfile has no sample data");
return;
}
} else {
dataIter = iter;
}
if (opts.keyStats) {
FileSKVIterator indexIter = iter.getIndex();
while (indexIter.hasTop()) {
indexKeyStats.add(indexIter.getTopKey());
indexIter.next();
}
}
BiFunction<Key, Value, String> formatter = null;
if (opts.formatterClazz != null) {
final Class<? extends BiFunction<Key, Value, String>> formatterClass = getFormatter(opts.formatterClazz);
formatter = formatterClass.getConstructor().newInstance();
} else if (opts.fullKeys) {
formatter = (key, value) -> key.toStringNoTruncate() + " -> " + value;
} else if (opts.dump) {
formatter = (key, value) -> key + " -> " + value;
}
for (String lgName : localityGroupCF.keySet()) {
LocalityGroupUtil.seek(dataIter, new Range(), lgName, localityGroupCF);
while (dataIter.hasTop()) {
Key key = dataIter.getTopKey();
Value value = dataIter.getTopValue();
if (formatter != null) {
System.out.println(formatter.apply(key, value));
if (System.out.checkError())
return;
}
if (opts.histogram) {
kvHistogram.add(key.getSize() + value.getSize());
}
if (opts.keyStats) {
dataKeyStats.add(key);
}
dataIter.next();
}
}
}
if (opts.printSummary) {
SummaryReader.print(iter, System.out);
}
iter.close();
if (opts.vis || opts.hash) {
System.out.println();
vmg.printMetrics(opts.hash, "Visibility", System.out);
}
if (opts.histogram) {
System.out.println();
kvHistogram.print("");
}
if (opts.keyStats) {
System.out.println();
System.out.println("Statistics for keys in data :");
dataKeyStats.print("\t");
System.out.println();
System.out.println("Statistics for keys in index :");
indexKeyStats.print("\t");
}
// If the output stream has closed, there is no reason to keep going.
if (System.out.checkError()) {
return;
}
}
}
use of org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder in project accumulo by apache.
the class SplitLarge method execute.
@Override
public void execute(String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Opts opts = new Opts();
opts.parseArgs("accumulo split-large", args);
for (String file : opts.files) {
AccumuloConfiguration aconf = opts.getSiteConfiguration();
CryptoService cryptoService = CryptoServiceFactory.newInstance(aconf, CryptoServiceFactory.ClassloaderType.JAVA);
Path path = new Path(file);
CachableBuilder cb = new CachableBuilder().fsPath(fs, path).conf(conf).cryptoService(cryptoService);
try (Reader iter = new RFile.Reader(cb)) {
if (!file.endsWith(".rf")) {
throw new IllegalArgumentException("File must end with .rf");
}
String smallName = file.substring(0, file.length() - 3) + "_small.rf";
String largeName = file.substring(0, file.length() - 3) + "_large.rf";
int blockSize = (int) aconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE);
try (Writer small = new RFile.Writer(new BCFile.Writer(fs.create(new Path(smallName)), null, "gz", conf, cryptoService), blockSize);
Writer large = new RFile.Writer(new BCFile.Writer(fs.create(new Path(largeName)), null, "gz", conf, cryptoService), blockSize)) {
small.startDefaultLocalityGroup();
large.startDefaultLocalityGroup();
iter.seek(new Range(), new ArrayList<>(), false);
while (iter.hasTop()) {
Key key = iter.getTopKey();
Value value = iter.getTopValue();
if (key.getSize() + value.getSize() < opts.maxSize) {
small.append(key, value);
} else {
large.append(key, value);
}
iter.next();
}
}
}
}
}
Aggregations