Search in sources :

Example 6 with Reader

use of org.apache.accumulo.core.file.rfile.RFile.Reader in project accumulo by apache.

the class PrintInfo method execute.

@Override
public void execute(final String[] args) throws Exception {
    Opts opts = new Opts();
    opts.parseArgs("accumulo rfile-info", args);
    if (opts.files.isEmpty()) {
        System.err.println("No files were given");
        System.exit(-1);
    }
    Configuration conf = new Configuration();
    for (String confFile : opts.configFiles) {
        log.debug("Adding Hadoop configuration file {}", confFile);
        conf.addResource(new Path(confFile));
    }
    FileSystem hadoopFs = FileSystem.get(conf);
    FileSystem localFs = FileSystem.getLocal(conf);
    LogHistogram kvHistogram = new LogHistogram();
    KeyStats dataKeyStats = new KeyStats();
    KeyStats indexKeyStats = new KeyStats();
    for (String arg : opts.files) {
        Path path = new Path(arg);
        FileSystem fs;
        if (arg.contains(":"))
            fs = path.getFileSystem(conf);
        else {
            log.warn("Attempting to find file across filesystems. Consider providing URI instead of path");
            // fall back to local
            fs = hadoopFs.exists(path) ? hadoopFs : localFs;
        }
        System.out.println("Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString());
        CachableBlockFile.Reader _rdr = new CachableBlockFile.Reader(fs, path, conf, null, null, SiteConfiguration.getInstance());
        Reader iter = new RFile.Reader(_rdr);
        MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer();
        if (opts.vis || opts.hash)
            iter.registerMetrics(vmg);
        iter.printInfo(opts.printIndex);
        System.out.println();
        org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(new String[] { arg });
        Map<String, ArrayList<ByteSequence>> localityGroupCF = null;
        if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats) {
            localityGroupCF = iter.getLocalityGroupCF();
            FileSKVIterator dataIter;
            if (opts.useSample) {
                dataIter = iter.getSample();
                if (dataIter == null) {
                    System.out.println("ERROR : This rfile has no sample data");
                    return;
                }
            } else {
                dataIter = iter;
            }
            if (opts.keyStats) {
                FileSKVIterator indexIter = iter.getIndex();
                while (indexIter.hasTop()) {
                    indexKeyStats.add(indexIter.getTopKey());
                    indexIter.next();
                }
            }
            for (String lgName : localityGroupCF.keySet()) {
                LocalityGroupUtil.seek(dataIter, new Range(), lgName, localityGroupCF);
                while (dataIter.hasTop()) {
                    Key key = dataIter.getTopKey();
                    Value value = dataIter.getTopValue();
                    if (opts.dump) {
                        System.out.println(key + " -> " + value);
                        if (System.out.checkError())
                            return;
                    }
                    if (opts.histogram) {
                        kvHistogram.add(key.getSize() + value.getSize());
                    }
                    if (opts.keyStats) {
                        dataKeyStats.add(key);
                    }
                    dataIter.next();
                }
            }
        }
        if (opts.printSummary) {
            SummaryReader.print(iter, System.out);
        }
        iter.close();
        if (opts.vis || opts.hash) {
            System.out.println();
            vmg.printMetrics(opts.hash, "Visibility", System.out);
        }
        if (opts.histogram) {
            System.out.println();
            kvHistogram.print("");
        }
        if (opts.keyStats) {
            System.out.println();
            System.out.println("Statistics for keys in data :");
            dataKeyStats.print("\t");
            System.out.println();
            System.out.println("Statistics for keys in index :");
            indexKeyStats.print("\t");
        }
        // If the output stream has closed, there is no reason to keep going.
        if (System.out.checkError())
            return;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) SiteConfiguration(org.apache.accumulo.core.conf.SiteConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) SummaryReader(org.apache.accumulo.core.summary.SummaryReader) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) Range(org.apache.accumulo.core.data.Range) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) CachableBlockFile(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key)

Aggregations

Reader (org.apache.accumulo.core.file.rfile.RFile.Reader)6 Key (org.apache.accumulo.core.data.Key)5 Value (org.apache.accumulo.core.data.Value)5 Configuration (org.apache.hadoop.conf.Configuration)4 ArrayList (java.util.ArrayList)3 DefaultConfiguration (org.apache.accumulo.core.conf.DefaultConfiguration)3 Range (org.apache.accumulo.core.data.Range)3 CachableBlockFile (org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile)3 Scanner (org.apache.accumulo.core.client.Scanner)2 NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)2 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)2 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)2 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)2 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)2 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)2 Path (org.apache.hadoop.fs.Path)2 Test (org.junit.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1