use of org.apache.accumulo.core.file.rfile.RFile.Reader in project accumulo by apache.
the class PrintInfo method execute.
@Override
public void execute(final String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs("accumulo rfile-info", args);
if (opts.files.isEmpty()) {
System.err.println("No files were given");
System.exit(-1);
}
Configuration conf = new Configuration();
for (String confFile : opts.configFiles) {
log.debug("Adding Hadoop configuration file {}", confFile);
conf.addResource(new Path(confFile));
}
FileSystem hadoopFs = FileSystem.get(conf);
FileSystem localFs = FileSystem.getLocal(conf);
LogHistogram kvHistogram = new LogHistogram();
KeyStats dataKeyStats = new KeyStats();
KeyStats indexKeyStats = new KeyStats();
for (String arg : opts.files) {
Path path = new Path(arg);
FileSystem fs;
if (arg.contains(":"))
fs = path.getFileSystem(conf);
else {
log.warn("Attempting to find file across filesystems. Consider providing URI instead of path");
// fall back to local
fs = hadoopFs.exists(path) ? hadoopFs : localFs;
}
System.out.println("Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString());
CachableBlockFile.Reader _rdr = new CachableBlockFile.Reader(fs, path, conf, null, null, SiteConfiguration.getInstance());
Reader iter = new RFile.Reader(_rdr);
MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer();
if (opts.vis || opts.hash)
iter.registerMetrics(vmg);
iter.printInfo(opts.printIndex);
System.out.println();
org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(new String[] { arg });
Map<String, ArrayList<ByteSequence>> localityGroupCF = null;
if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats) {
localityGroupCF = iter.getLocalityGroupCF();
FileSKVIterator dataIter;
if (opts.useSample) {
dataIter = iter.getSample();
if (dataIter == null) {
System.out.println("ERROR : This rfile has no sample data");
return;
}
} else {
dataIter = iter;
}
if (opts.keyStats) {
FileSKVIterator indexIter = iter.getIndex();
while (indexIter.hasTop()) {
indexKeyStats.add(indexIter.getTopKey());
indexIter.next();
}
}
for (String lgName : localityGroupCF.keySet()) {
LocalityGroupUtil.seek(dataIter, new Range(), lgName, localityGroupCF);
while (dataIter.hasTop()) {
Key key = dataIter.getTopKey();
Value value = dataIter.getTopValue();
if (opts.dump) {
System.out.println(key + " -> " + value);
if (System.out.checkError())
return;
}
if (opts.histogram) {
kvHistogram.add(key.getSize() + value.getSize());
}
if (opts.keyStats) {
dataKeyStats.add(key);
}
dataIter.next();
}
}
}
if (opts.printSummary) {
SummaryReader.print(iter, System.out);
}
iter.close();
if (opts.vis || opts.hash) {
System.out.println();
vmg.printMetrics(opts.hash, "Visibility", System.out);
}
if (opts.histogram) {
System.out.println();
kvHistogram.print("");
}
if (opts.keyStats) {
System.out.println();
System.out.println("Statistics for keys in data :");
dataKeyStats.print("\t");
System.out.println();
System.out.println("Statistics for keys in index :");
indexKeyStats.print("\t");
}
// If the output stream has closed, there is no reason to keep going.
if (System.out.checkError())
return;
}
}
Aggregations