Search in sources :

Example 1 with CounterSummary

use of org.apache.accumulo.core.client.summary.CounterSummary in project accumulo by apache.

the class RFileTest method checkSummaries.

private void checkSummaries(Collection<Summary> summaries, Map<String, Long> expected, int extra) {
    Assert.assertEquals(1, summaries.size());
    for (Summary summary : summaries) {
        Assert.assertEquals(extra, summary.getFileStatistics().getInaccurate());
        Assert.assertEquals(extra, summary.getFileStatistics().getExtra());
        Assert.assertEquals(2, summary.getFileStatistics().getTotal());
        String className = summary.getSummarizerConfiguration().getClassName();
        CounterSummary counterSummary = new CounterSummary(summary);
        if (className.equals(VisibilitySummarizer.class.getName())) {
            Map<String, Long> counters = counterSummary.getCounters();
            Assert.assertEquals(expected, counters);
        } else {
            Assert.fail("Unexpected classname " + className);
        }
    }
}
Also used : CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Summary(org.apache.accumulo.core.client.summary.Summary) VisibilitySummarizer(org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer)

Example 2 with CounterSummary

use of org.apache.accumulo.core.client.summary.CounterSummary in project accumulo by apache.

the class CountingSummarizerTest method testSummarizing.

@Test
public void testSummarizing() {
    SummarizerConfiguration sc = SummarizerConfiguration.builder(FamilySummarizer.class).addOptions(MAX_COUNTERS_OPT, "5", MAX_COUNTER_LEN_OPT, "10").build();
    FamilySummarizer countSum = new FamilySummarizer();
    Value val = new Value("abc");
    Summarizer.Collector collector = countSum.collector(sc);
    for (String fam : Arrays.asList("f1", "f1", "f1", "f2", "f1", "f70000000000000000000", "f70000000000000000001", "f2", "f3", "f4", "f5", "f6", "f7", "f3", "f7")) {
        collector.accept(new Key("r", fam), val);
    }
    Key dk = new Key("r", "f2");
    dk.setDeleted(true);
    collector.accept(dk, new Value(""));
    HashMap<String, Long> stats = new HashMap<>();
    collector.summarize((k, v) -> stats.put(k, v));
    String p = COUNTER_STAT_PREFIX;
    HashMap<String, Long> expected = new HashMap<>();
    expected.put(p + "f1", 4l);
    expected.put(p + "f2", 2l);
    expected.put(p + "f3", 2l);
    expected.put(p + "f4", 1l);
    expected.put(p + "f5", 1l);
    expected.put(TOO_LONG_STAT, 2l);
    expected.put(TOO_MANY_STAT, 3l);
    expected.put(SEEN_STAT, 16l);
    expected.put(EMITTED_STAT, 15l);
    expected.put(DELETES_IGNORED_STAT, 1l);
    Assert.assertEquals(expected, stats);
    CounterSummary csum = new CounterSummary(stats);
    Assert.assertEquals(5, csum.getIgnored());
    Assert.assertEquals(3, csum.getTooMany());
    Assert.assertEquals(2, csum.getTooLong());
    Assert.assertEquals(16, csum.getSeen());
    Assert.assertEquals(15, csum.getEmitted());
    Assert.assertEquals(1, csum.getDeletesIgnored());
    expected.clear();
    expected.put("f1", 4l);
    expected.put("f2", 2l);
    expected.put("f3", 2l);
    expected.put("f4", 1l);
    expected.put("f5", 1l);
    Assert.assertEquals(expected, csum.getCounters());
}
Also used : Collector(org.apache.accumulo.core.client.summary.Summarizer.Collector) HashMap(java.util.HashMap) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Value(org.apache.accumulo.core.data.Value) VisibilitySummarizer(org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer) CountingSummarizer(org.apache.accumulo.core.client.summary.CountingSummarizer) Summarizer(org.apache.accumulo.core.client.summary.Summarizer) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 3 with CounterSummary

use of org.apache.accumulo.core.client.summary.CounterSummary in project accumulo by apache.

the class RFileTest method testSummaries.

@Test
public void testSummaries() throws Exception {
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(VisibilitySummarizer.class).build();
    SummarizerConfiguration sc2 = SummarizerConfiguration.builder(FamilySummarizer.class).build();
    LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
    String testFile = createTmpTestFile();
    SortedMap<Key, Value> testData1 = createTestData(0, 100, 0, 4, 1, "A&B", "A&B&C");
    RFileWriter writer = RFile.newWriter().to(testFile).withFileSystem(localFs).withSummarizers(sc1, sc2).build();
    writer.append(testData1.entrySet());
    writer.close();
    // verify summary data
    Collection<Summary> summaries = RFile.summaries().from(testFile).withFileSystem(localFs).read();
    Assert.assertEquals(2, summaries.size());
    for (Summary summary : summaries) {
        Assert.assertEquals(0, summary.getFileStatistics().getInaccurate());
        Assert.assertEquals(1, summary.getFileStatistics().getTotal());
        String className = summary.getSummarizerConfiguration().getClassName();
        CounterSummary counterSummary = new CounterSummary(summary);
        if (className.equals(FamilySummarizer.class.getName())) {
            Map<String, Long> counters = counterSummary.getCounters();
            Map<String, Long> expected = ImmutableMap.of("0000", 200l, "0001", 200l, "0002", 200l, "0003", 200l);
            Assert.assertEquals(expected, counters);
        } else if (className.equals(VisibilitySummarizer.class.getName())) {
            Map<String, Long> counters = counterSummary.getCounters();
            Map<String, Long> expected = ImmutableMap.of("A&B", 400l, "A&B&C", 400l);
            Assert.assertEquals(expected, counters);
        } else {
            Assert.fail("Unexpected classname " + className);
        }
    }
    // check if writing summary data impacted normal rfile functionality
    Scanner scanner = RFile.newScanner().from(testFile).withFileSystem(localFs).withAuthorizations(new Authorizations("A", "B", "C")).build();
    Assert.assertEquals(testData1, toMap(scanner));
    scanner.close();
    String testFile2 = createTmpTestFile();
    SortedMap<Key, Value> testData2 = createTestData(100, 100, 0, 4, 1, "A&B", "A&B&C");
    writer = RFile.newWriter().to(testFile2).withFileSystem(localFs).withSummarizers(sc1, sc2).build();
    writer.append(testData2.entrySet());
    writer.close();
    // verify reading summaries from multiple files works
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).read();
    Assert.assertEquals(2, summaries.size());
    for (Summary summary : summaries) {
        Assert.assertEquals(0, summary.getFileStatistics().getInaccurate());
        Assert.assertEquals(2, summary.getFileStatistics().getTotal());
        String className = summary.getSummarizerConfiguration().getClassName();
        CounterSummary counterSummary = new CounterSummary(summary);
        if (className.equals(FamilySummarizer.class.getName())) {
            Map<String, Long> counters = counterSummary.getCounters();
            Map<String, Long> expected = ImmutableMap.of("0000", 400l, "0001", 400l, "0002", 400l, "0003", 400l);
            Assert.assertEquals(expected, counters);
        } else if (className.equals(VisibilitySummarizer.class.getName())) {
            Map<String, Long> counters = counterSummary.getCounters();
            Map<String, Long> expected = ImmutableMap.of("A&B", 800l, "A&B&C", 800l);
            Assert.assertEquals(expected, counters);
        } else {
            Assert.fail("Unexpected classname " + className);
        }
    }
    // verify reading a subset of summaries works
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 0);
    // the following test check boundry conditions for start row and end row
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(99)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 400l, "A&B&C", 400l), 0);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(98)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 1);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(0)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 1);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow("#").read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 0);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(100)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 400l, "A&B&C", 400l), 1);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).endRow(rowStr(99)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 400l, "A&B&C", 400l), 0);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).endRow(rowStr(100)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 1);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).endRow(rowStr(199)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 0);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(50)).endRow(rowStr(150)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 2);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(120)).endRow(rowStr(150)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 400l, "A&B&C", 400l), 1);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(50)).endRow(rowStr(199)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 1);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow("#").endRow(rowStr(150)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 800l, "A&B&C", 800l), 1);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(199)).read();
    checkSummaries(summaries, ImmutableMap.of(), 0);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).startRow(rowStr(200)).read();
    checkSummaries(summaries, ImmutableMap.of(), 0);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).endRow("#").read();
    checkSummaries(summaries, ImmutableMap.of(), 0);
    summaries = RFile.summaries().from(testFile, testFile2).withFileSystem(localFs).selectSummaries(sc -> sc.equals(sc1)).endRow(rowStr(0)).read();
    checkSummaries(summaries, ImmutableMap.of("A&B", 400l, "A&B&C", 400l), 1);
}
Also used : ByteSequence(org.apache.accumulo.core.data.ByteSequence) Arrays(java.util.Arrays) VisibilitySummarizer(org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) Text(org.apache.hadoop.io.Text) HashMap(java.util.HashMap) Random(java.util.Random) ArrayByteSequence(org.apache.accumulo.core.data.ArrayByteSequence) ArrayList(java.util.ArrayList) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) FileOperations(org.apache.accumulo.core.file.FileOperations) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) Configuration(org.apache.hadoop.conf.Configuration) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Value(org.apache.accumulo.core.data.Value) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) Property(org.apache.accumulo.core.conf.Property) Summary(org.apache.accumulo.core.client.summary.Summary) Iterator(java.util.Iterator) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) IOException(java.io.IOException) Test(org.junit.Test) Authorizations(org.apache.accumulo.core.security.Authorizations) File(java.io.File) DefaultConfiguration(org.apache.accumulo.core.conf.DefaultConfiguration) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AbstractMap(java.util.AbstractMap) List(java.util.List) TreeMap(java.util.TreeMap) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) RegExFilter(org.apache.accumulo.core.iterators.user.RegExFilter) Entry(java.util.Map.Entry) Assert(org.junit.Assert) Collections(java.util.Collections) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) SortedMap(java.util.SortedMap) Scanner(org.apache.accumulo.core.client.Scanner) Scanner(org.apache.accumulo.core.client.Scanner) Authorizations(org.apache.accumulo.core.security.Authorizations) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) DefaultConfiguration(org.apache.accumulo.core.conf.DefaultConfiguration) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) VisibilitySummarizer(org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Value(org.apache.accumulo.core.data.Value) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Summary(org.apache.accumulo.core.client.summary.Summary) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) AbstractMap(java.util.AbstractMap) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 4 with CounterSummary

use of org.apache.accumulo.core.client.summary.CounterSummary in project accumulo by apache.

the class CountingSummarizerTest method testMultipleEmit.

@Test
public void testMultipleEmit() {
    SummarizerConfiguration sc = SummarizerConfiguration.builder(MultiSummarizer.class).build();
    MultiSummarizer countSum = new MultiSummarizer();
    Summarizer.Collector collector = countSum.collector(sc);
    Value val = new Value("abc");
    HashMap<String, Long> expected = new HashMap<>();
    for (String row : new String[] { "ask", "asleep", "some", "soul" }) {
        for (String fam : new String[] { "hop", "hope", "nope", "noop" }) {
            for (String qual : new String[] { "mad", "lad", "lab", "map" }) {
                collector.accept(new Key(row, fam, qual), val);
                expected.merge("rp:" + row.substring(0, 2), 1l, Long::sum);
                expected.merge("fp:" + fam.substring(0, 2), 1l, Long::sum);
                expected.merge("qp:" + qual.substring(0, 2), 1l, Long::sum);
            }
        }
    }
    HashMap<String, Long> stats = new HashMap<>();
    collector.summarize((k, v) -> stats.put(k, v));
    CounterSummary csum = new CounterSummary(stats);
    Assert.assertEquals(expected, csum.getCounters());
    Assert.assertEquals(64, csum.getSeen());
    Assert.assertEquals(3 * 64, csum.getEmitted());
    Assert.assertEquals(0, csum.getIgnored());
    Assert.assertEquals(0, csum.getDeletesIgnored());
}
Also used : Collector(org.apache.accumulo.core.client.summary.Summarizer.Collector) HashMap(java.util.HashMap) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Value(org.apache.accumulo.core.data.Value) VisibilitySummarizer(org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer) CountingSummarizer(org.apache.accumulo.core.client.summary.CountingSummarizer) Summarizer(org.apache.accumulo.core.client.summary.Summarizer) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 5 with CounterSummary

use of org.apache.accumulo.core.client.summary.CounterSummary in project accumulo by apache.

the class CountingSummarizerTest method testCountDeletes.

@Test
public void testCountDeletes() {
    SummarizerConfiguration sc = SummarizerConfiguration.builder(FamilySummarizer.class).addOptions(INGNORE_DELETES_OPT, "false").build();
    FamilySummarizer countSum = new FamilySummarizer();
    Key k1 = new Key("r1", "f1");
    Key k2 = new Key("r1", "f1");
    k2.setDeleted(true);
    Key k3 = new Key("r1", "f2");
    Collector collector = countSum.collector(sc);
    collector.accept(k1, new Value(""));
    collector.accept(k2, new Value(""));
    collector.accept(k3, new Value(""));
    String p = COUNTER_STAT_PREFIX;
    HashMap<String, Long> expected = new HashMap<>();
    expected.put(p + "f1", 2l);
    expected.put(p + "f2", 1l);
    expected.put(TOO_LONG_STAT, 0l);
    expected.put(TOO_MANY_STAT, 0l);
    expected.put(SEEN_STAT, 3l);
    expected.put(EMITTED_STAT, 3l);
    expected.put(DELETES_IGNORED_STAT, 0l);
    HashMap<String, Long> stats = new HashMap<>();
    collector.summarize(stats::put);
    Assert.assertEquals(expected, stats);
    CounterSummary csum = new CounterSummary(stats);
    Assert.assertEquals(0, csum.getIgnored());
    Assert.assertEquals(0, csum.getTooMany());
    Assert.assertEquals(0, csum.getTooLong());
    Assert.assertEquals(3, csum.getSeen());
    Assert.assertEquals(3, csum.getEmitted());
    Assert.assertEquals(0, csum.getDeletesIgnored());
    expected.clear();
    expected.put("f1", 2l);
    expected.put("f2", 1l);
    Assert.assertEquals(expected, csum.getCounters());
}
Also used : HashMap(java.util.HashMap) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Collector(org.apache.accumulo.core.client.summary.Summarizer.Collector) Value(org.apache.accumulo.core.data.Value) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) FamilySummarizer(org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

CounterSummary (org.apache.accumulo.core.client.summary.CounterSummary)6 HashMap (java.util.HashMap)5 Test (org.junit.Test)5 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)4 FamilySummarizer (org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer)4 VisibilitySummarizer (org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer)4 Key (org.apache.accumulo.core.data.Key)4 Value (org.apache.accumulo.core.data.Value)4 Collector (org.apache.accumulo.core.client.summary.Summarizer.Collector)3 Summary (org.apache.accumulo.core.client.summary.Summary)3 Random (java.util.Random)2 NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)2 CountingSummarizer (org.apache.accumulo.core.client.summary.CountingSummarizer)2 Summarizer (org.apache.accumulo.core.client.summary.Summarizer)2 Text (org.apache.hadoop.io.Text)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 File (java.io.File)1 IOException (java.io.IOException)1 AbstractMap (java.util.AbstractMap)1 ArrayList (java.util.ArrayList)1