Search in sources :

Example 6 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class EntryLengthSummarizersTest method testAll.

/* Miscellaneous Test */
@Test
public void testAll() {
    SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
    EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
    Key k1 = new Key("maximumnoqualifier", "f1", "q", "vis1");
    Key k2 = new Key("minKey", "fam2", "q2", "visibility2");
    Key k3 = new Key("row3", "f3", "qualifier3", "v3");
    Key k4 = new Key("r4", "family4", "qual4", "vis4");
    Key k5 = new Key("fifthrow", "thirdfamily", "q5", "v5");
    Key k6 = new Key("r6", "sixthfamily", "qual6", "visibi6");
    Collector collector = entrySum.collector(sc);
    collector.accept(k1, new Value("v1"));
    collector.accept(k2, new Value("value2"));
    collector.accept(k3, new Value("val3"));
    collector.accept(k4, new Value("fourthvalue"));
    collector.accept(k5, new Value(""));
    collector.accept(k6, new Value("value6"));
    HashMap<String, Long> stats = new HashMap<>();
    collector.summarize(stats::put);
    HashMap<String, Long> expected = new HashMap<>();
    expected.put("key.min", 18L);
    expected.put("key.max", 25L);
    expected.put("key.sum", 132L);
    // Log2 Histogram
    expected.put("key.logHist.4", 2L);
    expected.put("key.logHist.5", 4L);
    expected.put("row.min", 2L);
    expected.put("row.max", 18L);
    expected.put("row.sum", 40L);
    // Log2 Histogram
    expected.put("row.logHist.1", 2L);
    expected.put("row.logHist.2", 1L);
    expected.put("row.logHist.3", 2L);
    expected.put("row.logHist.4", 1L);
    expected.put("family.min", 2L);
    expected.put("family.max", 11L);
    expected.put("family.sum", 37L);
    // Log2 Histogram
    expected.put("family.logHist.1", 2L);
    expected.put("family.logHist.2", 1L);
    expected.put("family.logHist.3", 3L);
    expected.put("qualifier.min", 1L);
    expected.put("qualifier.max", 10L);
    expected.put("qualifier.sum", 25L);
    // Log2 Histogram
    expected.put("qualifier.logHist.0", 1L);
    expected.put("qualifier.logHist.1", 2L);
    expected.put("qualifier.logHist.2", 2L);
    expected.put("qualifier.logHist.3", 1L);
    expected.put("visibility.min", 2L);
    expected.put("visibility.max", 11L);
    expected.put("visibility.sum", 30L);
    // Log2 Histogram
    expected.put("visibility.logHist.1", 2L);
    expected.put("visibility.logHist.2", 2L);
    expected.put("visibility.logHist.3", 2L);
    expected.put("value.min", 0L);
    expected.put("value.max", 11L);
    expected.put("value.sum", 29L);
    // Log2 Histogram
    expected.put("value.logHist.0", 1L);
    expected.put("value.logHist.1", 1L);
    expected.put("value.logHist.2", 1L);
    expected.put("value.logHist.3", 3L);
    expected.put("total", 6L);
    Assert.assertEquals(expected, stats);
}
Also used : HashMap(java.util.HashMap) EntryLengthSummarizer(org.apache.accumulo.core.client.summary.summarizers.EntryLengthSummarizer) Collector(org.apache.accumulo.core.client.summary.Summarizer.Collector) Value(org.apache.accumulo.core.data.Value) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 7 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class EntryLengthSummarizersTest method testBasicRow.

@Test
public void testBasicRow() {
    SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
    EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
    Key k1 = new Key("r1");
    Key k2 = new Key("r2");
    Key k3 = new Key("r3");
    Collector collector = entrySum.collector(sc);
    collector.accept(k1, new Value(""));
    collector.accept(k2, new Value(""));
    collector.accept(k3, new Value(""));
    HashMap<String, Long> stats = new HashMap<>();
    collector.summarize(stats::put);
    HashMap<String, Long> expected = new HashMap<>();
    expected.put("key.min", 2L);
    expected.put("key.max", 2L);
    expected.put("key.sum", 6L);
    // Log2 Histogram
    expected.put("key.logHist.1", 3L);
    expected.put("row.min", 2L);
    expected.put("row.max", 2L);
    expected.put("row.sum", 6L);
    // Log2 Histogram
    expected.put("row.logHist.1", 3L);
    expected.put("family.min", 0L);
    expected.put("family.max", 0L);
    expected.put("family.sum", 0L);
    // Log2 Histogram
    expected.put("family.logHist.0", 3L);
    expected.put("qualifier.min", 0L);
    expected.put("qualifier.max", 0L);
    expected.put("qualifier.sum", 0L);
    // Log2 Histogram
    expected.put("qualifier.logHist.0", 3L);
    expected.put("visibility.min", 0L);
    expected.put("visibility.max", 0L);
    expected.put("visibility.sum", 0L);
    // Log2 Histogram
    expected.put("visibility.logHist.0", 3L);
    expected.put("value.min", 0L);
    expected.put("value.max", 0L);
    expected.put("value.sum", 0L);
    // Log2 Histogram
    expected.put("value.logHist.0", 3L);
    expected.put("total", 3L);
    Assert.assertEquals(expected, stats);
}
Also used : HashMap(java.util.HashMap) EntryLengthSummarizer(org.apache.accumulo.core.client.summary.summarizers.EntryLengthSummarizer) Collector(org.apache.accumulo.core.client.summary.Summarizer.Collector) Value(org.apache.accumulo.core.data.Value) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 8 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class EntryLengthSummarizersTest method testComplexValue.

@Test
public void testComplexValue() {
    SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
    EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
    Key k1 = new Key("r1", "family1", "columnQualifier1", "v1");
    Key k2 = new Key("row2", "columnFamily2", "q2", "visibility2");
    Key k3 = new Key("columnRow3", "f3", "qualifier3", "columnVisibility3");
    Collector collector = entrySum.collector(sc);
    collector.accept(k1, new Value("v1"));
    collector.accept(k2, new Value("value2"));
    collector.accept(k3, new Value("keyValue3"));
    HashMap<String, Long> stats = new HashMap<>();
    collector.summarize(stats::put);
    HashMap<String, Long> expected = new HashMap<>();
    expected.put("key.min", 27L);
    expected.put("key.max", 39L);
    expected.put("key.sum", 96L);
    // Log2 Histogram
    expected.put("key.logHist.5", 3L);
    expected.put("row.min", 2L);
    expected.put("row.max", 10L);
    expected.put("row.sum", 16L);
    // Log2 Histogram
    expected.put("row.logHist.1", 1L);
    expected.put("row.logHist.2", 1L);
    expected.put("row.logHist.3", 1L);
    expected.put("family.min", 2L);
    expected.put("family.max", 13L);
    expected.put("family.sum", 22L);
    // Log2 Histogram
    expected.put("family.logHist.1", 1L);
    expected.put("family.logHist.3", 1L);
    expected.put("family.logHist.4", 1L);
    expected.put("qualifier.min", 2L);
    expected.put("qualifier.max", 16L);
    expected.put("qualifier.sum", 28L);
    // Log2 Histogram
    expected.put("qualifier.logHist.1", 1L);
    expected.put("qualifier.logHist.3", 1L);
    expected.put("qualifier.logHist.4", 1L);
    expected.put("visibility.min", 2L);
    expected.put("visibility.max", 17L);
    expected.put("visibility.sum", 30L);
    // Log2 Histogram
    expected.put("visibility.logHist.1", 1L);
    expected.put("visibility.logHist.3", 1L);
    expected.put("visibility.logHist.4", 1L);
    expected.put("value.min", 2L);
    expected.put("value.max", 9L);
    expected.put("value.sum", 17L);
    // Log2 Histogram
    expected.put("value.logHist.1", 1L);
    expected.put("value.logHist.3", 2L);
    expected.put("total", 3L);
    Assert.assertEquals(expected, stats);
}
Also used : HashMap(java.util.HashMap) EntryLengthSummarizer(org.apache.accumulo.core.client.summary.summarizers.EntryLengthSummarizer) Collector(org.apache.accumulo.core.client.summary.Summarizer.Collector) Value(org.apache.accumulo.core.data.Value) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 9 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class SummaryIT method basicSummaryTest.

@Test
public void basicSummaryTest() throws Exception {
    final String table = getUniqueNames(1)[0];
    Connector c = getConnector();
    NewTableConfiguration ntc = new NewTableConfiguration();
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(BasicSummarizer.class.getName()).build();
    ntc.enableSummarization(sc1);
    c.tableOperations().create(table, ntc);
    BatchWriter bw = writeData(table, c);
    Collection<Summary> summaries = c.tableOperations().summaries(table).flush(false).retrieve();
    Assert.assertEquals(0, summaries.size());
    LongSummaryStatistics stats = getTimestampStats(table, c);
    summaries = c.tableOperations().summaries(table).flush(true).retrieve();
    checkSummaries(summaries, sc1, 1, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    Mutation m = new Mutation(String.format("r%09x", 999));
    m.put("f1", "q1", "999-0");
    m.putDelete("f1", "q2");
    bw.addMutation(m);
    bw.flush();
    c.tableOperations().flush(table, null, null, true);
    stats = getTimestampStats(table, c);
    summaries = c.tableOperations().summaries(table).retrieve();
    checkSummaries(summaries, sc1, 2, 0, 0, TOTAL_STAT, 100_002l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 1l);
    bw.close();
    c.tableOperations().compact(table, new CompactionConfig().setWait(true));
    summaries = c.tableOperations().summaries(table).retrieve();
    checkSummaries(summaries, sc1, 1, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    // split tablet into two
    String sp1 = String.format("r%09x", 50_000);
    addSplits(table, c, sp1);
    summaries = c.tableOperations().summaries(table).retrieve();
    checkSummaries(summaries, sc1, 1, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    // compact 2nd tablet
    c.tableOperations().compact(table, new CompactionConfig().setStartRow(new Text(sp1)).setWait(true));
    summaries = c.tableOperations().summaries(table).retrieve();
    checkSummaries(summaries, sc1, 2, 0, 1, TOTAL_STAT, 113_999l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    // get summaries for first tablet
    stats = getTimestampStats(table, c, sp1, null);
    summaries = c.tableOperations().summaries(table).startRow(sp1).retrieve();
    checkSummaries(summaries, sc1, 1, 0, 0, TOTAL_STAT, 49_999l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    // compact all tablets and regenerate all summaries
    c.tableOperations().compact(table, new CompactionConfig());
    summaries = c.tableOperations().summaries(table).retrieve();
    stats = getTimestampStats(table, c);
    checkSummaries(summaries, sc1, 2, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
    summaries = c.tableOperations().summaries(table).startRow(String.format("r%09x", 75_000)).endRow(String.format("r%09x", 80_000)).retrieve();
    Summary summary = Iterables.getOnlyElement(summaries);
    Assert.assertEquals(1, summary.getFileStatistics().getTotal());
    Assert.assertEquals(1, summary.getFileStatistics().getExtra());
    long total = summary.getStatistics().get(TOTAL_STAT);
    Assert.assertTrue("Total " + total + " out of expected range", total > 0 && total <= 10_000);
    // test adding and removing
    c.tableOperations().removeSummarizers(table, sc -> sc.getClassName().contains("foo"));
    List<SummarizerConfiguration> summarizers = c.tableOperations().listSummarizers(table);
    Assert.assertEquals(1, summarizers.size());
    Assert.assertTrue(summarizers.contains(sc1));
    c.tableOperations().removeSummarizers(table, sc -> sc.getClassName().equals(BasicSummarizer.class.getName()));
    summarizers = c.tableOperations().listSummarizers(table);
    Assert.assertEquals(0, summarizers.size());
    c.tableOperations().compact(table, new CompactionConfig().setWait(true));
    summaries = c.tableOperations().summaries(table).retrieve();
    Assert.assertEquals(0, summaries.size());
    c.tableOperations().addSummarizers(table, sc1);
    c.tableOperations().compact(table, new CompactionConfig().setWait(true));
    summaries = c.tableOperations().summaries(table).retrieve();
    checkSummaries(summaries, sc1, 2, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Text(org.apache.hadoop.io.Text) LongSummaryStatistics(java.util.LongSummaryStatistics) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) CompactionConfig(org.apache.accumulo.core.client.admin.CompactionConfig) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Summary(org.apache.accumulo.core.client.summary.Summary) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Test(org.junit.Test)

Example 10 with SummarizerConfiguration

use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.

the class SummaryIT method tooLargeTest.

@Test
public void tooLargeTest() throws Exception {
    final String table = getUniqueNames(1)[0];
    Connector c = getConnector();
    NewTableConfiguration ntc = new NewTableConfiguration();
    SummarizerConfiguration sc1 = SummarizerConfiguration.builder(BigSummarizer.class).build();
    ntc.enableSummarization(sc1);
    c.tableOperations().create(table, ntc);
    try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
        write(bw, "a_large", "f1", "q1", "v1");
        write(bw, "v_small", "f1", "q1", "v2");
    }
    c.tableOperations().flush(table, null, null, true);
    Summary summary = c.tableOperations().summaries(table).retrieve().get(0);
    Assert.assertEquals(1, summary.getFileStatistics().getLarge());
    Assert.assertEquals(0, summary.getFileStatistics().getMissing());
    Assert.assertEquals(0, summary.getFileStatistics().getExtra());
    Assert.assertEquals(0, summary.getFileStatistics().getDeleted());
    Assert.assertEquals(1, summary.getFileStatistics().getInaccurate());
    Assert.assertEquals(1, summary.getFileStatistics().getTotal());
    Assert.assertEquals(Collections.emptyMap(), summary.getStatistics());
    // create situation where one tablet has summary data and one does not because the summary data was too large
    c.tableOperations().addSplits(table, new TreeSet<>(Collections.singleton(new Text("m"))));
    c.tableOperations().compact(table, new CompactionConfig().setWait(true));
    summary = c.tableOperations().summaries(table).retrieve().get(0);
    Assert.assertEquals(1, summary.getFileStatistics().getLarge());
    Assert.assertEquals(0, summary.getFileStatistics().getMissing());
    Assert.assertEquals(0, summary.getFileStatistics().getExtra());
    Assert.assertEquals(0, summary.getFileStatistics().getDeleted());
    Assert.assertEquals(1, summary.getFileStatistics().getInaccurate());
    Assert.assertEquals(2, summary.getFileStatistics().getTotal());
    HashMap<String, Long> expected = new HashMap<>();
    for (int i = 0; i < 10; i++) {
        expected.put(String.format("%09x", i), i * 19l);
    }
    Assert.assertEquals(expected, summary.getStatistics());
}
Also used : Connector(org.apache.accumulo.core.client.Connector) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) NewTableConfiguration(org.apache.accumulo.core.client.admin.NewTableConfiguration) CompactionConfig(org.apache.accumulo.core.client.admin.CompactionConfig) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) CounterSummary(org.apache.accumulo.core.client.summary.CounterSummary) Summary(org.apache.accumulo.core.client.summary.Summary) BatchWriter(org.apache.accumulo.core.client.BatchWriter) SummarizerConfiguration(org.apache.accumulo.core.client.summary.SummarizerConfiguration) Test(org.junit.Test)

Aggregations

SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)41 Test (org.junit.Test)33 HashMap (java.util.HashMap)28 Key (org.apache.accumulo.core.data.Key)22 Value (org.apache.accumulo.core.data.Value)22 Collector (org.apache.accumulo.core.client.summary.Summarizer.Collector)19 EntryLengthSummarizer (org.apache.accumulo.core.client.summary.summarizers.EntryLengthSummarizer)16 Summary (org.apache.accumulo.core.client.summary.Summary)13 NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)10 CounterSummary (org.apache.accumulo.core.client.summary.CounterSummary)10 Connector (org.apache.accumulo.core.client.Connector)9 BatchWriter (org.apache.accumulo.core.client.BatchWriter)8 Text (org.apache.hadoop.io.Text)8 FamilySummarizer (org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer)7 ArrayList (java.util.ArrayList)6 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)6 IOException (java.io.IOException)5 Collection (java.util.Collection)5 Map (java.util.Map)5 Entry (java.util.Map.Entry)5